diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 36af6c98d18b..c1773258554c 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -134,7 +134,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, : nullptr), GetClangTidyOptions(Opts.GetClangTidyOptions), SuggestMissingIncludes(Opts.SuggestMissingIncludes), - TweakFilter(Opts.TweakFilter), WorkspaceRoot(Opts.WorkspaceRoot), + BuildRecoveryAST(Opts.BuildRecoveryAST), TweakFilter(Opts.TweakFilter), + WorkspaceRoot(Opts.WorkspaceRoot), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST // is parsed. @@ -191,6 +192,7 @@ void ClangdServer::addDocument(PathRef File, llvm::StringRef Contents, Inputs.ForceRebuild = ForceRebuild; Inputs.Opts = std::move(Opts); Inputs.Index = Index; + Inputs.Opts.BuildRecoveryAST = BuildRecoveryAST; bool NewFile = WorkScheduler.update(File, Inputs, WantDiags); // If we loaded Foo.h, we want to make sure Foo.cpp is indexed. if (NewFile && BackgroundIdx) @@ -269,9 +271,13 @@ void ClangdServer::signatureHelp(PathRef File, Position Pos, if (!IP) return CB(IP.takeError()); - auto PreambleData = IP->Preamble; - CB(clangd::signatureHelp(File, IP->Command, PreambleData, IP->Contents, Pos, - FS, Index)); + const auto *PreambleData = IP->Preamble; + if (!PreambleData) + return CB(llvm::createStringError(llvm::inconvertibleErrorCode(), + "Failed to parse includes")); + + CB(clangd::signatureHelp(File, IP->Command, *PreambleData, IP->Contents, + Pos, FS, Index)); }; // Unlike code completion, we wait for an up-to-date preamble here. diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index a0659c7c3d22..f1e981e6c14f 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -118,6 +118,9 @@ class ClangdServer { /// enabled. ClangTidyOptionsBuilder GetClangTidyOptions; + /// If true, turn on the `-frecovery-ast` clang flag. + bool BuildRecoveryAST = false; + /// Clangd's workspace root. Relevant for "workspace" operations not bound /// to a particular file. /// FIXME: If not set, should use the current working directory. @@ -345,6 +348,9 @@ class ClangdServer { // can be caused by missing includes (e.g. member access in incomplete type). bool SuggestMissingIncludes = false; + // If true, preserve expressions in AST for broken code. + bool BuildRecoveryAST = false; + std::function TweakFilter; // GUARDED_BY(CachedCompletionFuzzyFindRequestMutex) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 344b90ecaa32..b544510ecea1 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -1022,7 +1022,7 @@ class SignatureHelpCollector final : public CodeCompleteConsumer { struct SemaCompleteInput { PathRef FileName; const tooling::CompileCommand &Command; - const PreambleData *Preamble; + const PreambleData &Preamble; llvm::StringRef Contents; size_t Offset; llvm::IntrusiveRefCntPtr VFS; @@ -1054,8 +1054,8 @@ bool semaCodeComplete(std::unique_ptr Consumer, IncludeStructure *Includes = nullptr) { trace::Span Tracer("Sema completion"); llvm::IntrusiveRefCntPtr VFS = Input.VFS; - if (Input.Preamble && Input.Preamble->StatCache) - VFS = Input.Preamble->StatCache->getConsumingFS(std::move(VFS)); + if (Input.Preamble.StatCache) + VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS)); ParseInputs ParseInput; ParseInput.CompileCommand = Input.Command; ParseInput.FS = VFS; @@ -1072,6 +1072,10 @@ bool semaCodeComplete(std::unique_ptr Consumer, FrontendOpts.SkipFunctionBodies = true; // Disable typo correction in Sema. CI->getLangOpts()->SpellChecking = false; + // Code completion won't trigger in delayed template bodies. + // This is on-by-default in windows to allow parsing SDK headers; we're only + // disabling it for the main-file (not preamble). + CI->getLangOpts()->DelayedTemplateParsing = false; // Setup code completion. FrontendOpts.CodeCompleteOpts = Options; FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName); @@ -1095,9 +1099,7 @@ bool semaCodeComplete(std::unique_ptr Consumer, // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise // the remapped buffers do not get freed. auto Clang = prepareCompilerInstance( - std::move(CI), - (Input.Preamble && !CompletingInPreamble) ? &Input.Preamble->Preamble - : nullptr, + std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr, std::move(ContentsBuffer), std::move(VFS), IgnoreDiags); Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble; Clang->setCodeCompletionConsumer(Consumer.release()); @@ -1114,8 +1116,7 @@ bool semaCodeComplete(std::unique_ptr Consumer, // - but Sema code complete won't see them: as part of the preamble, they're // deserialized only when mentioned. // Force them to be deserialized so SemaCodeComplete sees them. - if (Input.Preamble) - loadMainFilePreambleMacros(Clang->getPreprocessor(), *Input.Preamble); + loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble); if (Includes) Clang->getPreprocessor().addPPCallbacks( collectIncludeStructureCallback(Clang->getSourceManager(), Includes)); @@ -1754,12 +1755,12 @@ codeComplete(PathRef FileName, const tooling::CompileCommand &Command, return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse) ? std::move(Flow).runWithoutSema(Contents, *Offset, VFS) : std::move(Flow).run( - {FileName, Command, Preamble, Contents, *Offset, VFS}); + {FileName, Command, *Preamble, Contents, *Offset, VFS}); } SignatureHelp signatureHelp(PathRef FileName, const tooling::CompileCommand &Command, - const PreambleData *Preamble, + const PreambleData &Preamble, llvm::StringRef Contents, Position Pos, llvm::IntrusiveRefCntPtr VFS, const SymbolIndex *Index) { diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index df06c156049f..3adea47c89a1 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -276,7 +276,7 @@ CodeCompleteResult codeComplete(PathRef FileName, /// Get signature help at a specified \p Pos in \p FileName. SignatureHelp signatureHelp(PathRef FileName, const tooling::CompileCommand &Command, - const PreambleData *Preamble, StringRef Contents, + const PreambleData &Preamble, StringRef Contents, Position Pos, IntrusiveRefCntPtr VFS, const SymbolIndex *Index); diff --git a/clang-tools-extra/clangd/Compiler.h b/clang-tools-extra/clangd/Compiler.h index ef5386bb0d17..b7cc174455f3 100644 --- a/clang-tools-extra/clangd/Compiler.h +++ b/clang-tools-extra/clangd/Compiler.h @@ -38,6 +38,7 @@ class IgnoreDiagnostics : public DiagnosticConsumer { struct ParseOptions { tidy::ClangTidyOptions ClangTidyOpts; bool SuggestMissingIncludes = false; + bool BuildRecoveryAST = false; }; /// Information required to run clang, e.g. to parse AST or do code completion. diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 1d6997f0b4d4..2c7cb5d2b85d 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -253,6 +253,10 @@ ParsedAST::build(llvm::StringRef Version, const PrecompiledPreamble *PreamblePCH = Preamble ? &Preamble->Preamble : nullptr; + // Recovery expression currently only works for C++. + if (CI->getLangOpts()->CPlusPlus) + CI->getLangOpts()->RecoveryAST = Opts.BuildRecoveryAST; + StoreDiags ASTDiags; std::string Content = std::string(Buffer->getBuffer()); std::string Filename = diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index fdee71fd2244..48f15420032f 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -132,6 +132,10 @@ buildPreamble(PathRef FileName, CompilerInvocation &CI, // to read back. We rely on dynamic index for the comments instead. CI.getPreprocessorOpts().WriteCommentListToPCH = false; + // Recovery expression currently only works for C++. + if (CI.getLangOpts()->CPlusPlus) + CI.getLangOpts()->RecoveryAST = Inputs.Opts.BuildRecoveryAST; + CppFilePreambleCallbacks SerializedDeclsCollector(FileName, PreambleCallback); if (Inputs.FS->setCurrentWorkingDirectory(Inputs.CompileCommand.Directory)) { log("Couldn't set working directory when building the preamble."); diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index 77b2cbce40d9..59af922d4005 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -520,6 +520,7 @@ llvm::StringRef toSemanticTokenType(HighlightingKind Kind) { case HighlightingKind::InactiveCode: return "comment"; } + llvm_unreachable("unhandled HighlightingKind"); } std::vector diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/launch.json b/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/launch.json index cd6b87bd05c0..7d414bc00f32 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/launch.json +++ b/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/launch.json @@ -1,4 +1,4 @@ -// A launch configuration that compiles the extension and then opens it inside a new window +// A launch configuration that compiles extension and opens it inside a new window. { "version": "0.1.0", "configurations": [ diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/tasks.json b/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/tasks.json index fb7f662e14d1..65b1c9598c0e 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/tasks.json +++ b/clang-tools-extra/clangd/clients/clangd-vscode/.vscode/tasks.json @@ -6,25 +6,27 @@ // ${fileExtname}: the current opened file's extension // ${cwd}: the current working directory of the spawned process -// A task runner that calls a custom npm script that compiles the extension. +// Task runner calls custom npm script to compile the extension. { - "version": "0.1.0", + "version": "2.0.0", - // we want to run npm + // Run NPM. "command": "npm", - // the command is a shell script - "isShellCommand": true, + // This command is a shell script. + "type": "shell", // show the output window only if unrecognized errors occur. - "showOutput": "silent", + "presentation": { + "reveal": "silent", + }, - // we run the custom script "compile" as defined in package.json + // Run custom "compile" script as defined in package.json "args": ["run", "compile", "--loglevel", "silent"], - // The tsc compiler is started in watching mode - "isWatching": true, + // tsc compiler is kept alive and runs in the background. + "isBackground": true, - // use the standard tsc in watch mode problem matcher to find compile problems in the output. + // Find compilation problems in the output through tsc in watch mode. "problemMatcher": "$tsc-watch" -} \ No newline at end of file +} diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/DEVELOPING.md b/clang-tools-extra/clangd/clients/clangd-vscode/DEVELOPING.md index e888aba3ea20..15f2b930329e 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/DEVELOPING.md +++ b/clang-tools-extra/clangd/clients/clangd-vscode/DEVELOPING.md @@ -10,20 +10,20 @@ A guide of developing `vscode-clangd` extension. ## Steps 1. Make sure you disable the installed `vscode-clangd` extension in VS Code. -2. Make sure you have clangd in /usr/bin/clangd or edit src/extension.ts to +2. Make sure you have clangd in `/usr/bin/clangd` or edit `src/extension.ts` to point to the binary. -3. In order to start a development instance of VS code extended with this, run: +3. To start a development instance of VS code extended with this, run: ```bash $ cd /path/to/clang-tools-extra/clangd/clients/clangd-vscode/ $ npm install $ code . - # When VS Code starts, press . + # When VSCode starts, press . ``` # Contributing -Please follow the exsiting code style when contributing to the extension, we +Please follow the existing code style when contributing to the extension, we recommend to run `npm run format` before sending a patch. # Publish to VS Code Marketplace @@ -38,15 +38,15 @@ to the marketplace. * Bump the version in `package.json`, and commit the change to upstream The extension is published under `llvm-vs-code-extensions` account, which is -currently maintained by clangd developers. If you want to make a new release, -please contact clangd-dev@lists.llvm.org. +maintained by clangd developers. If you want to make a new release, please +contact clangd-dev@lists.llvm.org. ## Steps ```bash $ cd /path/to/clang-tools-extra/clangd/clients/clangd-vscode/ - # For the first time, you need to login in the account. vsce will ask you for - the Personal Access Token, and remember it for future commands. + # For the first time, you need to login into the account. vsce will ask you + for the Personal Access Token and will remember it for future commands. $ vsce login llvm-vs-code-extensions # Publish the extension to the VSCode marketplace. $ npm run publish diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts index 4749cd1bb582..a7570b63e552 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts +++ b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts @@ -3,7 +3,7 @@ import * as vscodelc from 'vscode-languageclient'; import * as semanticHighlighting from './semantic-highlighting'; /** - * Method to get workspace configuration option + * Get an option from workspace configuration. * @param option name of the option (e.g. for clangd.path should be path) * @param defaultValue default value to return if option is not set */ @@ -75,8 +75,8 @@ class EnableEditsNearCursorFeature implements vscodelc.StaticFeature { } /** - * this method is called when your extension is activate - * your extension is activated the very first time the command is executed + * This method is called when the extension is activated. The extension is + * activated the very first time a command is executed. */ export function activate(context: vscode.ExtensionContext) { const syncFileEvents = getConfig('syncFileEvents', true); @@ -97,7 +97,7 @@ export function activate(context: vscode.ExtensionContext) { documentSelector: [ { scheme: 'file', language: 'c' }, { scheme: 'file', language: 'cpp' }, - // cuda is not supported by vscode, but our extension does. + // CUDA is not supported by vscode, but our extension does supports it. { scheme: 'file', language: 'cuda' }, { scheme: 'file', language: 'objective-c'}, { scheme: 'file', language: 'objective-cpp'} @@ -106,7 +106,7 @@ export function activate(context: vscode.ExtensionContext) { // FIXME: send sync file events when clangd provides implementations. }, initializationOptions: { clangdFileStatus: true }, - // Do not switch to output window when clangd returns output + // Do not switch to output window when clangd returns output. revealOutputChannelOn: vscodelc.RevealOutputChannelOn.Never, // We hack up the completion items a bit to prevent VSCode from re-ranking them @@ -126,7 +126,7 @@ export function activate(context: vscode.ExtensionContext) { provideCompletionItem: async (document, position, context, token, next) => { let list = await next(document, position, context, token); let items = (Array.isArray(list) ? list : list.items).map(item => { - // Gets the prefix used by vscode when doing fuzzymatch. + // Gets the prefix used by VSCode when doing fuzzymatch. let prefix = document.getText(new vscode.Range(item.range.start, position)) if (prefix) item.filterText = prefix + "_" + item.filterText; diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/tsconfig.json b/clang-tools-extra/clangd/clients/clangd-vscode/tsconfig.json index 0b05f3090920..71a62c71da02 100644 --- a/clang-tools-extra/clangd/clients/clangd-vscode/tsconfig.json +++ b/clang-tools-extra/clangd/clients/clangd-vscode/tsconfig.json @@ -26,4 +26,4 @@ "node_modules", ".vscode-test" ] -} \ No newline at end of file +} diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 7a7bb9b0718e..9bfc58b55f71 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -281,6 +281,15 @@ opt CrossFileRename{ Hidden, }; +opt RecoveryAST{ + "recovery-ast", + cat(Features), + desc("Preserve expressions in AST for broken code (C++ only). Note that " + "this feature is experimental and may lead to crashes"), + init(false), + Hidden, +}; + opt WorkerThreadsCount{ "j", cat(Misc), @@ -629,6 +638,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var } Opts.StaticIndex = StaticIdx.get(); Opts.AsyncThreadsCount = WorkerThreadsCount; + Opts.BuildRecoveryAST = RecoveryAST; clangd::CodeCompleteOptions CCOpts; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp index 1e5fcf3d97e1..d15eba80ae29 100644 --- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp @@ -552,15 +552,13 @@ TEST_F(ClangdVFSTest, InvalidCompileCommand) { EXPECT_ERROR(runFindDocumentHighlights(Server, FooCpp, Position())); EXPECT_ERROR(runRename(Server, FooCpp, Position(), "new_name", clangd::RenameOptions())); + EXPECT_ERROR(runSignatureHelp(Server, FooCpp, Position())); // Identifier-based fallback completion. EXPECT_THAT(cantFail(runCodeComplete(Server, FooCpp, Position(), clangd::CodeCompleteOptions())) .Completions, ElementsAre(Field(&CodeCompletion::Name, "int"), Field(&CodeCompletion::Name, "main"))); - auto SigHelp = runSignatureHelp(Server, FooCpp, Position()); - ASSERT_TRUE(bool(SigHelp)) << "signatureHelp returned an error"; - EXPECT_THAT(SigHelp->signatures, IsEmpty()); } class ClangdThreadingTest : public ClangdVFSTest {}; diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 24197485f68a..1084b1550579 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -93,8 +93,9 @@ std::unique_ptr memIndex(std::vector Symbols) { return MemIndex::build(std::move(Slab).build(), RefSlab(), RelationSlab()); } -CodeCompleteResult completions(ClangdServer &Server, llvm::StringRef TestCode, - Position Point, +// Runs code completion. +// If IndexSymbols is non-empty, an index will be built and passed to opts. +CodeCompleteResult completions(const TestTU &TU, Position Point, std::vector IndexSymbols = {}, clangd::CodeCompleteOptions Opts = {}) { std::unique_ptr OverrideIndex; @@ -104,49 +105,34 @@ CodeCompleteResult completions(ClangdServer &Server, llvm::StringRef TestCode, Opts.Index = OverrideIndex.get(); } - auto File = testPath("foo.cpp"); - runAddDocument(Server, File, TestCode); - auto CompletionList = - llvm::cantFail(runCodeComplete(Server, File, Point, Opts)); - return CompletionList; -} - -CodeCompleteResult completions(ClangdServer &Server, llvm::StringRef Text, - std::vector IndexSymbols = {}, - clangd::CodeCompleteOptions Opts = {}, - PathRef FilePath = "foo.cpp") { - std::unique_ptr OverrideIndex; - if (!IndexSymbols.empty()) { - assert(!Opts.Index && "both Index and IndexSymbols given!"); - OverrideIndex = memIndex(std::move(IndexSymbols)); - Opts.Index = OverrideIndex.get(); + auto Inputs = TU.inputs(); + IgnoreDiagnostics Diags; + auto CI = buildCompilerInvocation(Inputs, Diags); + if (!CI) { + ADD_FAILURE() << "Couldn't build CompilerInvocation"; + return {}; } - - auto File = testPath(FilePath); - Annotations Test(Text); - runAddDocument(Server, File, Test.code()); - auto CompletionList = - llvm::cantFail(runCodeComplete(Server, File, Test.point(), Opts)); - return CompletionList; + auto Preamble = + buildPreamble(testPath(TU.Filename), *CI, /*OldPreamble=*/nullptr, Inputs, + /*InMemory=*/true, /*Callback=*/nullptr); + return codeComplete(testPath(TU.Filename), Inputs.CompileCommand, + Preamble.get(), TU.Code, Point, Inputs.FS, Opts); } -// Builds a server and runs code completion. -// If IndexSymbols is non-empty, an index will be built and passed to opts. +// Runs code completion. CodeCompleteResult completions(llvm::StringRef Text, std::vector IndexSymbols = {}, clangd::CodeCompleteOptions Opts = {}, PathRef FilePath = "foo.cpp") { - MockFSProvider FS; - MockCompilationDatabase CDB; + Annotations Test(Text); + auto TU = TestTU::withCode(Test.code()); // To make sure our tests for completiopns inside templates work on Windows. - CDB.ExtraClangFlags = {"-fno-delayed-template-parsing"}; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - return completions(Server, Text, std::move(IndexSymbols), std::move(Opts), - FilePath); + TU.Filename = FilePath.str(); + return completions(TU, Test.point(), std::move(IndexSymbols), + std::move(Opts)); } -// Builds a server and runs code completion. -// If IndexSymbols is non-empty, an index will be built and passed to opts. +// Runs code completion without the clang parser. CodeCompleteResult completionsNoCompile(llvm::StringRef Text, std::vector IndexSymbols = {}, clangd::CodeCompleteOptions Opts = {}, @@ -669,53 +655,38 @@ TEST(CompletionTest, SemaIndexMergeWithLimit) { } TEST(CompletionTest, IncludeInsertionPreprocessorIntegrationTests) { - MockFSProvider FS; - MockCompilationDatabase CDB; - std::string Subdir = testPath("sub"); - std::string SearchDirArg = (Twine("-I") + Subdir).str(); - CDB.ExtraClangFlags = {SearchDirArg.c_str()}; - std::string BarHeader = testPath("sub/bar.h"); - FS.Files[BarHeader] = ""; + TestTU TU; + TU.ExtraArgs.push_back("-I" + testPath("sub")); + TU.AdditionalFiles["sub/bar.h"] = ""; + auto BarURI = URI::create(testPath("sub/bar.h")).toString(); - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - auto BarURI = URI::create(BarHeader).toString(); Symbol Sym = cls("ns::X"); Sym.CanonicalDeclaration.FileURI = BarURI.c_str(); Sym.IncludeHeaders.emplace_back(BarURI, 1); // Shoten include path based on search directory and insert. - auto Results = completions(Server, - R"cpp( - int main() { ns::^ } - )cpp", - {Sym}); + Annotations Test("int main() { ns::^ }"); + TU.Code = Test.code().str(); + auto Results = completions(TU, Test.point(), {Sym}); EXPECT_THAT(Results.Completions, ElementsAre(AllOf(Named("X"), InsertInclude("\"bar.h\"")))); // Can be disabled via option. CodeCompleteOptions NoInsertion; NoInsertion.InsertIncludes = CodeCompleteOptions::NeverInsert; - Results = completions(Server, - R"cpp( - int main() { ns::^ } - )cpp", - {Sym}, NoInsertion); + Results = completions(TU, Test.point(), {Sym}, NoInsertion); EXPECT_THAT(Results.Completions, ElementsAre(AllOf(Named("X"), Not(InsertInclude())))); // Duplicate based on inclusions in preamble. - Results = completions(Server, - R"cpp( + Test = Annotations(R"cpp( #include "sub/bar.h" // not shortest, so should only match resolved. int main() { ns::^ } - )cpp", - {Sym}); + )cpp"); + TU.Code = Test.code().str(); + Results = completions(TU, Test.point(), {Sym}); EXPECT_THAT(Results.Completions, ElementsAre(AllOf(Named("X"), Labeled("X"), Not(InsertInclude())))); } TEST(CompletionTest, NoIncludeInsertionWhenDeclFoundInFile) { - MockFSProvider FS; - MockCompilationDatabase CDB; - - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); Symbol SymX = cls("ns::X"); Symbol SymY = cls("ns::Y"); std::string BarHeader = testPath("bar.h"); @@ -725,8 +696,7 @@ TEST(CompletionTest, NoIncludeInsertionWhenDeclFoundInFile) { SymX.IncludeHeaders.emplace_back("", 1); SymY.IncludeHeaders.emplace_back("", 1); // Shoten include path based on search directory and insert. - auto Results = completions(Server, - R"cpp( + auto Results = completions(R"cpp( namespace ns { class X; class Y {}; @@ -740,34 +710,27 @@ TEST(CompletionTest, NoIncludeInsertionWhenDeclFoundInFile) { } TEST(CompletionTest, IndexSuppressesPreambleCompletions) { - MockFSProvider FS; - MockCompilationDatabase CDB; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - - FS.Files[testPath("bar.h")] = - R"cpp(namespace ns { struct preamble { int member; }; })cpp"; - auto File = testPath("foo.cpp"); Annotations Test(R"cpp( #include "bar.h" namespace ns { int local; } void f() { ns::^; } void f2() { ns::preamble().$2^; } )cpp"); - runAddDocument(Server, File, Test.code()); - clangd::CodeCompleteOptions Opts = {}; + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["bar.h"] = + R"cpp(namespace ns { struct preamble { int member; }; })cpp"; + clangd::CodeCompleteOptions Opts = {}; auto I = memIndex({var("ns::index")}); Opts.Index = I.get(); - auto WithIndex = cantFail(runCodeComplete(Server, File, Test.point(), Opts)); + auto WithIndex = completions(TU, Test.point(), {}, Opts); EXPECT_THAT(WithIndex.Completions, UnorderedElementsAre(Named("local"), Named("index"))); - auto ClassFromPreamble = - cantFail(runCodeComplete(Server, File, Test.point("2"), Opts)); + auto ClassFromPreamble = completions(TU, Test.point("2"), {}, Opts); EXPECT_THAT(ClassFromPreamble.Completions, Contains(Named("member"))); Opts.Index = nullptr; - auto WithoutIndex = - cantFail(runCodeComplete(Server, File, Test.point(), Opts)); + auto WithoutIndex = completions(TU, Test.point(), {}, Opts); EXPECT_THAT(WithoutIndex.Completions, UnorderedElementsAre(Named("local"), Named("preamble"))); } @@ -811,7 +774,14 @@ TEST(CompletionTest, DynamicIndexIncludeInsertion) { Server.addDocument(testPath("foo_impl.cpp"), FileContent); // Wait for the dynamic index being built. ASSERT_TRUE(Server.blockUntilIdleForTest()); - EXPECT_THAT(completions(Server, "Foo^ foo;").Completions, + + auto File = testPath("foo.cpp"); + Annotations Test("Foo^ foo;"); + runAddDocument(Server, File, Test.code()); + auto CompletionList = + llvm::cantFail(runCodeComplete(Server, File, Test.point(), {})); + + EXPECT_THAT(CompletionList.Completions, ElementsAre(AllOf(Named("Foo"), HasInclude("\"foo_header.h\""), InsertInclude()))); } @@ -892,13 +862,17 @@ TEST(CompletionTest, CommentsFromSystemHeaders) { int foo(); )cpp"; - auto Results = completions(Server, - R"cpp( + auto File = testPath("foo.cpp"); + Annotations Test(R"cpp( #include "foo.h" int x = foo^ )cpp"); + runAddDocument(Server, File, Test.code()); + auto CompletionList = + llvm::cantFail(runCodeComplete(Server, File, Test.point(), {})); + EXPECT_THAT( - Results.Completions, + CompletionList.Completions, Contains(AllOf(Named("foo"), Doc("This comment should be retained!")))); } @@ -1064,15 +1038,23 @@ SignatureHelp signatures(llvm::StringRef Text, Position Point, if (!IndexSymbols.empty()) Index = memIndex(IndexSymbols); - MockFSProvider FS; - MockCompilationDatabase CDB; - ClangdServer::Options Opts = ClangdServer::optsForTest(); - Opts.StaticIndex = Index.get(); - - ClangdServer Server(CDB, FS, Opts); - auto File = testPath("foo.cpp"); - runAddDocument(Server, File, Text); - return llvm::cantFail(runSignatureHelp(Server, File, Point)); + auto TU = TestTU::withCode(Text); + auto Inputs = TU.inputs(); + IgnoreDiagnostics Diags; + auto CI = buildCompilerInvocation(Inputs, Diags); + if (!CI) { + ADD_FAILURE() << "Couldn't build CompilerInvocation"; + return {}; + } + auto Preamble = + buildPreamble(testPath(TU.Filename), *CI, /*OldPreamble=*/nullptr, Inputs, + /*InMemory=*/true, /*Callback=*/nullptr); + if (!Preamble) { + ADD_FAILURE() << "Couldn't build Preamble"; + return {}; + } + return signatureHelp(testPath(TU.Filename), Inputs.CompileCommand, *Preamble, + Text, Point, Inputs.FS, Index.get()); } SignatureHelp signatures(llvm::StringRef Text, @@ -1546,14 +1528,7 @@ TEST(CompletionTest, DocumentationFromChangedFileCrash) { } TEST(CompletionTest, NonDocComments) { - MockFSProvider FS; - auto FooCpp = testPath("foo.cpp"); - FS.Files[FooCpp] = ""; - - MockCompilationDatabase CDB; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - - Annotations Source(R"cpp( + const char *Text = R"cpp( // We ignore namespace comments, for rationale see CodeCompletionStrings.h. namespace comments_ns { } @@ -1588,17 +1563,11 @@ TEST(CompletionTest, NonDocComments) { int Struct::comments_quux() { int a = comments^; } - )cpp"); - // FIXME: Auto-completion in a template requires disabling delayed template - // parsing. - CDB.ExtraClangFlags.push_back("-fno-delayed-template-parsing"); - runAddDocument(Server, FooCpp, Source.code(), "null", WantDiagnostics::Yes); - CodeCompleteResult Completions = cantFail(runCodeComplete( - Server, FooCpp, Source.point(), clangd::CodeCompleteOptions())); + )cpp"; // We should not get any of those comments in completion. EXPECT_THAT( - Completions.Completions, + completions(Text).Completions, UnorderedElementsAre(AllOf(Not(IsDocumented()), Named("comments_foo")), AllOf(IsDocumented(), Named("comments_baz")), AllOf(IsDocumented(), Named("comments_quux")), @@ -1740,11 +1709,10 @@ TEST(CompletionTest, CodeCompletionContext) { TEST(CompletionTest, FixItForArrowToDot) { MockFSProvider FS; MockCompilationDatabase CDB; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); CodeCompleteOptions Opts; Opts.IncludeFixIts = true; - Annotations TestCode( + const char* Code = R"cpp( class Auxilary { public: @@ -1760,13 +1728,12 @@ TEST(CompletionTest, FixItForArrowToDot) { ClassWithPtr x; x[[->]]^; } - )cpp"); - auto Results = - completions(Server, TestCode.code(), TestCode.point(), {}, Opts); + )cpp"; + auto Results = completions(Code, {}, Opts); EXPECT_EQ(Results.Completions.size(), 3u); TextEdit ReplacementEdit; - ReplacementEdit.range = TestCode.range(); + ReplacementEdit.range = Annotations(Code).range(); ReplacementEdit.newText = "."; for (const auto &C : Results.Completions) { EXPECT_TRUE(C.FixIts.size() == 1u || C.Name == "AuxFunction"); @@ -1777,13 +1744,9 @@ TEST(CompletionTest, FixItForArrowToDot) { } TEST(CompletionTest, FixItForDotToArrow) { - MockFSProvider FS; - MockCompilationDatabase CDB; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - CodeCompleteOptions Opts; Opts.IncludeFixIts = true; - Annotations TestCode( + const char* Code = R"cpp( class Auxilary { public: @@ -1799,13 +1762,12 @@ TEST(CompletionTest, FixItForDotToArrow) { ClassWithPtr x; x[[.]]^; } - )cpp"); - auto Results = - completions(Server, TestCode.code(), TestCode.point(), {}, Opts); + )cpp"; + auto Results = completions(Code, {}, Opts); EXPECT_EQ(Results.Completions.size(), 3u); TextEdit ReplacementEdit; - ReplacementEdit.range = TestCode.range(); + ReplacementEdit.range = Annotations(Code).range(); ReplacementEdit.newText = "->"; for (const auto &C : Results.Completions) { EXPECT_TRUE(C.FixIts.empty() || C.Name == "AuxFunction"); @@ -1858,8 +1820,8 @@ TEST(CompletionTest, RenderWithFixItNonMerged) { TEST(CompletionTest, CompletionTokenRange) { MockFSProvider FS; MockCompilationDatabase CDB; - FS.Files["foo/abc/foo.h"] = ""; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); + TestTU TU; + TU.AdditionalFiles["foo/abc/foo.h"] = ""; constexpr const char *TestCodes[] = { R"cpp( @@ -1891,10 +1853,10 @@ TEST(CompletionTest, CompletionTokenRange) { }; for (const auto &Text : TestCodes) { Annotations TestCode(Text); - auto Results = completions(Server, TestCode.code(), TestCode.point()); - + TU.Code = TestCode.code().str(); + auto Results = completions(TU, TestCode.point()); if (Results.Completions.size() != 1) { - ADD_FAILURE() << "Results.Completions.size() != 1"; + ADD_FAILURE() << "Results.Completions.size() != 1" << Text; continue; } EXPECT_THAT(Results.Completions.front().CompletionTokenRange, @@ -2247,13 +2209,12 @@ TEST(CompletionTest, InsertTheMostPopularHeader) { } TEST(CompletionTest, NoInsertIncludeIfOnePresent) { - MockFSProvider FS; - MockCompilationDatabase CDB; - - std::string FooHeader = testPath("foo.h"); - FS.Files[FooHeader] = ""; - - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); + Annotations Test(R"cpp( + #include "foo.h" + Fun^ + )cpp"); + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["foo.h"] = ""; std::string DeclFile = URI::create(testPath("foo")).toString(); Symbol Sym = func("Func"); @@ -2262,7 +2223,7 @@ TEST(CompletionTest, NoInsertIncludeIfOnePresent) { Sym.IncludeHeaders.emplace_back("\"bar.h\"", 1000); EXPECT_THAT( - completions(Server, "#include \"foo.h\"\nFun^", {Sym}).Completions, + completions(TU, Test.point(), {Sym}).Completions, UnorderedElementsAre( AllOf(Named("Func"), HasInclude("\"foo.h\""), Not(InsertInclude())))); } @@ -2279,20 +2240,15 @@ TEST(CompletionTest, MergeMacrosFromIndexAndSema) { } TEST(CompletionTest, MacroFromPreamble) { - MockFSProvider FS; - MockCompilationDatabase CDB; - std::string FooHeader = testPath("foo.h"); - FS.Files[FooHeader] = "#define CLANGD_PREAMBLE_HEADER x\n"; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - auto Results = completions( - R"cpp(#include "foo.h" - #define CLANGD_PREAMBLE_MAIN x + Annotations Test(R"cpp(#define CLANGD_PREAMBLE_MAIN x int x = 0; #define CLANGD_MAIN x void f() { CLANGD_^ } - )cpp", - {func("CLANGD_INDEX")}); + )cpp"); + auto TU = TestTU::withCode(Test.code()); + TU.HeaderCode = "#define CLANGD_PREAMBLE_HEADER x"; + auto Results = completions(TU, Test.point(), {func("CLANGD_INDEX")}); // We should get results from the main file, including the preamble section. // However no results from included files (the index should cover them). EXPECT_THAT(Results.Completions, @@ -2405,29 +2361,22 @@ TEST(SignatureHelpTest, ConstructorInitializeFields) { } TEST(CompletionTest, IncludedCompletionKinds) { - MockFSProvider FS; - MockCompilationDatabase CDB; - std::string Subdir = testPath("sub"); - std::string SearchDirArg = (Twine("-I") + Subdir).str(); - CDB.ExtraClangFlags = {SearchDirArg.c_str()}; - std::string BarHeader = testPath("sub/bar.h"); - FS.Files[BarHeader] = ""; - ClangdServer Server(CDB, FS, ClangdServer::optsForTest()); - auto Results = completions(Server, - R"cpp( - #include "^" - )cpp"); + Annotations Test(R"cpp(#include "^")cpp"); + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["sub/bar.h"] = ""; + TU.ExtraArgs.push_back("-I" + testPath("sub")); + + auto Results = completions(TU, Test.point()); EXPECT_THAT(Results.Completions, AllOf(Has("sub/", CompletionItemKind::Folder), Has("bar.h\"", CompletionItemKind::File))); } TEST(CompletionTest, NoCrashAtNonAlphaIncludeHeader) { - auto Results = completions( + completions( R"cpp( #include "./^" )cpp"); - EXPECT_TRUE(Results.Completions.empty()); } TEST(CompletionTest, NoAllScopesCompletionWhenQualified) { @@ -2714,6 +2663,20 @@ TEST(CompletionTest, NoCrashWithIncompleteLambda) { EXPECT_THAT(Signatures, Contains(Sig("x() -> auto"))); } +TEST(CompletionTest, DelayedTemplateParsing) { + Annotations Test(R"cpp( + int xxx; + template int foo() { return xx^; } + )cpp"); + auto TU = TestTU::withCode(Test.code()); + // Even though delayed-template-parsing is on, we will disable it to provide + // completion in templates. + TU.ExtraArgs.push_back("-fdelayed-template-parsing"); + + EXPECT_THAT(completions(TU, Test.point()).Completions, + Contains(Named("xxx"))); +} + TEST(CompletionTest, CompletionRange) { const char *WithRange = "auto x = [[abc]]^"; auto Completions = completions(WithRange); diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index c38ccc3f9441..7b6fff292e66 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -65,7 +65,7 @@ class TargetDeclTest : public ::testing::Test { protected: using Rel = DeclRelation; std::string Code; - std::vector Flags; + std::vector Flags; // Asserts that `Code` has a marked selection of a node `NodeType`, // and returns allTargetDecls() as PrintedDecl structs. @@ -132,6 +132,16 @@ TEST_F(TargetDeclTest, Exprs) { EXPECT_DECLS("CXXOperatorCallExpr", "void operator()(int n)"); } +TEST_F(TargetDeclTest, Recovery) { + Code = R"cpp( + // error-ok: testing behavior on broken code + int f(); + int f(int, int); + int x = [[f]](42); + )cpp"; + EXPECT_DECLS("UnresolvedLookupExpr", "int f()", "int f(int, int)"); +} + TEST_F(TargetDeclTest, UsingDecl) { Code = R"cpp( namespace foo { @@ -685,6 +695,15 @@ TEST_F(FindExplicitReferencesTest, All) { )cpp", "0: targets = {x}\n" "1: targets = {X::a}\n"}, + {R"cpp( + // error-ok: testing with broken code + int bar(); + int foo() { + return $0^bar() + $1^bar(42); + } + )cpp", + "0: targets = {bar}\n" + "1: targets = {bar}\n"}, // Namespaces and aliases. {R"cpp( namespace ns {} diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp index 909c125aed2e..2adcfc338cc2 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -20,7 +20,7 @@ namespace clang { namespace clangd { -ParsedAST TestTU::build() const { +ParseInputs TestTU::inputs() const { std::string FullFilename = testPath(Filename), FullHeaderName = testPath(HeaderFilename), ImportThunk = testPath("import_thunk.h"); @@ -34,43 +34,48 @@ ParsedAST TestTU::build() const { Files[FullHeaderName] = HeaderCode; Files[ImportThunk] = ThunkContents; - std::vector Cmd = {"clang"}; + ParseInputs Inputs; + auto& Argv = Inputs.CompileCommand.CommandLine; + Argv = {"clang"}; // FIXME: this shouldn't need to be conditional, but it breaks a // GoToDefinition test for some reason (getMacroArgExpandedLocation fails). if (!HeaderCode.empty()) { - Cmd.push_back("-include"); - Cmd.push_back(ImplicitHeaderGuard ? ImportThunk.c_str() - : FullHeaderName.c_str()); + Argv.push_back("-include"); + Argv.push_back(ImplicitHeaderGuard ? ImportThunk : FullHeaderName); // ms-compatibility changes the meaning of #import. // The default is OS-dependent (on on windows), ensure it's off. if (ImplicitHeaderGuard) - Cmd.push_back("-fno-ms-compatibility"); + Inputs.CompileCommand.CommandLine.push_back("-fno-ms-compatibility"); } - Cmd.insert(Cmd.end(), ExtraArgs.begin(), ExtraArgs.end()); + Argv.insert(Argv.end(), ExtraArgs.begin(), ExtraArgs.end()); // Put the file name at the end -- this allows the extra arg (-xc++) to // override the language setting. - Cmd.push_back(FullFilename.c_str()); - ParseInputs Inputs; + Argv.push_back(FullFilename); Inputs.CompileCommand.Filename = FullFilename; - Inputs.CompileCommand.CommandLine = {Cmd.begin(), Cmd.end()}; Inputs.CompileCommand.Directory = testRoot(); Inputs.Contents = Code; Inputs.FS = buildTestFS(Files); Inputs.Opts = ParseOptions(); + Inputs.Opts.BuildRecoveryAST = true; Inputs.Opts.ClangTidyOpts.Checks = ClangTidyChecks; Inputs.Opts.ClangTidyOpts.WarningsAsErrors = ClangTidyWarningsAsErrors; Inputs.Index = ExternalIndex; if (Inputs.Index) Inputs.Opts.SuggestMissingIncludes = true; + return Inputs; +} + +ParsedAST TestTU::build() const { + auto Inputs = inputs(); StoreDiags Diags; auto CI = buildCompilerInvocation(Inputs, Diags); assert(CI && "Failed to build compilation invocation."); auto Preamble = - buildPreamble(FullFilename, *CI, + buildPreamble(testPath(Filename), *CI, /*OldPreamble=*/nullptr, Inputs, /*StoreInMemory=*/true, /*PreambleCallback=*/nullptr); - auto AST = - buildAST(FullFilename, std::move(CI), Diags.take(), Inputs, Preamble); + auto AST = buildAST(testPath(Filename), std::move(CI), Diags.take(), Inputs, + Preamble); if (!AST.hasValue()) { ADD_FAILURE() << "Failed to build code:\n" << Code; llvm_unreachable("Failed to build TestTU!"); @@ -79,9 +84,17 @@ ParsedAST TestTU::build() const { // This guards against accidental syntax errors silently subverting tests. // error-ok is awfully primitive - using clang -verify would be nicer. // Ownership and layering makes it pretty hard. - if (llvm::none_of(Files, [](const auto &KV) { - return llvm::StringRef(KV.second).contains("error-ok"); - })) { + bool ErrorOk = [&, this] { + llvm::StringLiteral Marker = "error-ok"; + if (llvm::StringRef(Code).contains(Marker) || + llvm::StringRef(HeaderCode).contains(Marker)) + return true; + for (const auto& KV : this->AdditionalFiles) + if (llvm::StringRef(KV.second).contains(Marker)) + return true; + return false; + }(); + if (!ErrorOk) { for (const auto &D : AST->getDiagnostics()) if (D.Severity >= DiagnosticsEngine::Error) { ADD_FAILURE() diff --git a/clang-tools-extra/clangd/unittests/TestTU.h b/clang-tools-extra/clangd/unittests/TestTU.h index 4668543d5b4d..229f65a4b95c 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.h +++ b/clang-tools-extra/clangd/unittests/TestTU.h @@ -17,6 +17,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H #define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H +#include "Compiler.h" #include "ParsedAST.h" #include "Path.h" #include "index/Index.h" @@ -54,7 +55,7 @@ struct TestTU { llvm::StringMap AdditionalFiles; // Extra arguments for the compiler invocation. - std::vector ExtraArgs; + std::vector ExtraArgs; llvm::Optional ClangTidyChecks; llvm::Optional ClangTidyWarningsAsErrors; @@ -67,6 +68,7 @@ struct TestTU { // By default, build() will report Error diagnostics as GTest errors. // Suppress this behavior by adding an 'error-ok' comment to the code. ParsedAST build() const; + ParseInputs inputs() const; SymbolSlab headerSymbols() const; std::unique_ptr index() const; }; diff --git a/clang-tools-extra/clangd/unittests/TweakTesting.h b/clang-tools-extra/clangd/unittests/TweakTesting.h index 10186f859bae..c771149a72fc 100644 --- a/clang-tools-extra/clangd/unittests/TweakTesting.h +++ b/clang-tools-extra/clangd/unittests/TweakTesting.h @@ -66,7 +66,7 @@ class TweakTest : public ::testing::Test { llvm::StringRef FileName = "TestTU.cpp"; // Extra flags passed to the compilation in apply(). - std::vector ExtraArgs; + std::vector ExtraArgs; // Context in which snippets of code should be placed to run tweaks. CodeContext Context = File; diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst index 891f6be63714..c2f05cf589ea 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst @@ -10,5 +10,5 @@ After applying modifications as suggested by the check, runnnig the check again might find more opportunities to mark member functions ``static``. After making a member function ``static``, you might want to run the check -`readability-static-accessed-through-instance` to replace calls like +`readability-static-accessed-through-instance `_ to replace calls like ``Instance.method()`` by ``Class::method()``. diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake b/clang/cmake/caches/CrossWinToARMLinux.cmake index 0d359a1609a5..3d1e961ada8d 100644 --- a/clang/cmake/caches/CrossWinToARMLinux.cmake +++ b/clang/cmake/caches/CrossWinToARMLinux.cmake @@ -86,6 +86,8 @@ set(LIBCXXABI_TARGET_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE S set(LIBCXXABI_SYSROOT "${DEFAULT_SYSROOT}" CACHE STRING "") set(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI OFF CACHE BOOL "") set(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX OFF CACHE BOOL "") +set(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXXABI OFF CACHE BOOL "") +set(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX OFF CACHE BOOL "") set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "") set(LIBCXX_TARGET_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE STRING "") diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 209a77440537..7fef9e867885 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -229,7 +229,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | base language | lambda support | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| misc extension | array shaping | :part:`worked on` | D74144 | +| misc extension | array shaping | :good:`done` | D74144 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | misc extension | library shutdown (omp_pause_resource[_all]) | :none:`unclaimed parts` | D55078 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 365951a360bb..41f1086a410f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -140,7 +140,8 @@ def NoEscapeDocs : Documentation { the compiler that the pointer cannot escape: that is, no reference to the object the pointer points to that is derived from the parameter value will survive after the function returns. Users are responsible for making sure parameters -annotated with ``noescape`` do not actuallly escape. +annotated with ``noescape`` do not actuallly escape. Calling ``free()`` on such +a parameter does not constitute an escape. For example: diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b9e16e695a39..1b2073b050f4 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5987,7 +5987,7 @@ def err_func_def_incomplete_result : Error< def err_atomic_specifier_bad_type : Error< "_Atomic cannot be applied to " "%select{incomplete |array |function |reference |atomic |qualified |sizeless |}0type " - "%1 %select{||||||which is not trivially copyable}0">; + "%1 %select{|||||||which is not trivially copyable}0">; // Expressions. def select_unary_expr_or_type_trait_kind : TextSubstitution< @@ -10244,7 +10244,8 @@ def warn_nested_declare_variant "nested context ignored">, InGroup; def err_omp_non_pointer_type_array_shaping_base : Error< - "expected pointer type expression as a base of an array shaping operation">; + "expected expression with a pointer to a complete type as a base of an array " + "shaping operation">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 9c2bc155cd4f..c47eb4587a57 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -662,7 +662,7 @@ class ASTSourceDescriptor { StringRef Path; StringRef ASTFile; ASTFileSignature Signature; - const Module *ClangModule = nullptr; + Module *ClangModule = nullptr; public: ASTSourceDescriptor() = default; @@ -670,13 +670,13 @@ class ASTSourceDescriptor { ASTFileSignature Signature) : PCHModuleName(std::move(Name)), Path(std::move(Path)), ASTFile(std::move(ASTFile)), Signature(Signature) {} - ASTSourceDescriptor(const Module &M); + ASTSourceDescriptor(Module &M); std::string getModuleName() const; StringRef getPath() const { return Path; } StringRef getASTFile() const { return ASTFile; } ASTFileSignature getSignature() const { return Signature; } - const Module *getModuleOrNull() const { return ClangModule; } + Module *getModuleOrNull() const { return ClangModule; } }; diff --git a/clang/include/clang/CodeGen/CodeGenABITypes.h b/clang/include/clang/CodeGen/CodeGenABITypes.h index 31f0cea57232..5f4af7fd2a36 100644 --- a/clang/include/clang/CodeGen/CodeGenABITypes.h +++ b/clang/include/clang/CodeGen/CodeGenABITypes.h @@ -28,11 +28,12 @@ #include "clang/CodeGen/CGFunctionInfo.h" namespace llvm { - class DataLayout; - class Module; - class Function; - class FunctionType; - class Type; +class Constant; +class DataLayout; +class Module; +class Function; +class FunctionType; +class Type; } namespace clang { @@ -44,6 +45,7 @@ class CoverageSourceInfo; class DiagnosticsEngine; class HeaderSearchOptions; class ObjCMethodDecl; +class ObjCProtocolDecl; class PreprocessorOptions; namespace CodeGen { @@ -137,6 +139,13 @@ llvm::Function *getNonTrivialCStructDestructor(CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT); +/// Get a pointer to a protocol object for the given declaration, emitting it if +/// it hasn't already been emitted in this translation unit. Note that the ABI +/// for emitting a protocol reference in code (e.g. for a protocol expression) +/// in most runtimes is not as simple as just materializing a pointer to this +/// object. +llvm::Constant *emitObjCProtocolObject(CodeGenModule &CGM, + const ObjCProtocolDecl *p); } // end namespace CodeGen } // end namespace clang diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td index 2224c152f626..218404e26409 100644 --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -282,8 +282,6 @@ def no_struct_path_tbaa : Flag<["-"], "no-struct-path-tbaa">, HelpText<"Turn off struct-path aware Type Based Alias Analysis">; def new_struct_path_tbaa : Flag<["-"], "new-struct-path-tbaa">, HelpText<"Enable enhanced struct-path aware Type Based Alias Analysis">; -def masm_verbose : Flag<["-"], "masm-verbose">, - HelpText<"Generate verbose assembly output">; def mdebug_pass : Separate<["-"], "mdebug-pass">, HelpText<"Enable additional debug output">; def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, @@ -455,8 +453,6 @@ def fspell_checking_limit : Separate<["-"], "fspell-checking-limit">, MetaVarNam def fcaret_diagnostics_max_lines : Separate<["-"], "fcaret-diagnostics-max-lines">, MetaVarName<"">, HelpText<"Set the maximum number of source lines to show in a caret diagnostic">; -def fmessage_length : Separate<["-"], "fmessage-length">, MetaVarName<"">, - HelpText<"Format message diagnostics so that they fit within N columns or fewer, when possible.">; def verify_EQ : CommaJoined<["-"], "verify=">, MetaVarName<"">, HelpText<"Verify diagnostic output using comment directives that start with" diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d65cf30a73a8..97128d623a13 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -918,7 +918,7 @@ def fdiagnostics_hotness_threshold_EQ : Joined<["-"], "fdiagnostics-hotness-thre Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Prevent optimization remarks from being output if they do not have at least this profile count">; def fdiagnostics_show_option : Flag<["-"], "fdiagnostics-show-option">, Group, - Flags<[CC1Option]>, HelpText<"Print option name with mappable diagnostics">; + HelpText<"Print option name with mappable diagnostics">; def fdiagnostics_show_note_include_stack : Flag<["-"], "fdiagnostics-show-note-include-stack">, Group, Flags<[CC1Option]>, HelpText<"Display include stacks for diagnostic notes">; def fdiagnostics_format_EQ : Joined<["-"], "fdiagnostics-format=">, Group; @@ -1383,7 +1383,8 @@ def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group, Flags<[DriverOption, CoreOption]>; def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Allow merging of constants">; -def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group; +def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group, Flags<[CC1Option]>, + HelpText<"Format message diagnostics so that they fit within N columns">; def fms_extensions : Flag<["-"], "fms-extensions">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Accept some non-standard constructs supported by the Microsoft compiler">; def fms_compatibility : Flag<["-"], "fms-compatibility">, Group, Flags<[CC1Option, CoreOption]>, @@ -1533,7 +1534,7 @@ def fno_cxx_modules : Flag <["-"], "fno-cxx-modules">, Group, def fno_diagnostics_fixit_info : Flag<["-"], "fno-diagnostics-fixit-info">, Group, Flags<[CC1Option]>, HelpText<"Do not include fixit information in diagnostics">; def fno_diagnostics_show_hotness : Flag<["-"], "fno-diagnostics-show-hotness">, Group; -def fno_diagnostics_show_option : Flag<["-"], "fno-diagnostics-show-option">, Group; +def fno_diagnostics_show_option : Flag<["-"], "fno-diagnostics-show-option">, Group, Flags<[CC1Option]>; def fno_diagnostics_show_note_include_stack : Flag<["-"], "fno-diagnostics-show-note-include-stack">, Flags<[CC1Option]>, Group; def fdigraphs : Flag<["-"], "fdigraphs">, Group, Flags<[CC1Option]>, @@ -1626,7 +1627,7 @@ def fno_register_global_dtors_with_atexit : Flag<["-"], "fno-register-global-dto HelpText<"Don't use atexit or __cxa_atexit to register global destructors">; def fno_unit_at_a_time : Flag<["-"], "fno-unit-at-a-time">, Group; def fno_unwind_tables : Flag<["-"], "fno-unwind-tables">, Group; -def fno_verbose_asm : Flag<["-"], "fno-verbose-asm">, Group; +def fno_verbose_asm : Flag<["-"], "fno-verbose-asm">, Group, Flags<[CC1Option]>; def fno_working_directory : Flag<["-"], "fno-working-directory">, Group; def fno_wrapv : Flag<["-"], "fno-wrapv">, Group; def fno_zero_initialized_in_bss : Flag<["-"], "fno-zero-initialized-in-bss">, Group; @@ -1983,7 +1984,8 @@ def fuse_init_array : Flag<["-"], "fuse-init-array">, Group, def fno_use_init_array : Flag<["-"], "fno-use-init-array">, Group, Flags<[CC1Option]>, HelpText<"Don't use .init_array instead of .ctors">; def fno_var_tracking : Flag<["-"], "fno-var-tracking">, Group; -def fverbose_asm : Flag<["-"], "fverbose-asm">, Group; +def fverbose_asm : Flag<["-"], "fverbose-asm">, Group, + HelpText<"Generate verbose assembly output">; def dA : Flag<["-"], "dA">, Alias; def fvisibility_EQ : Joined<["-"], "fvisibility=">, Group, HelpText<"Set the default symbol visibility for all global declarations">, Values<"hidden,default">; diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index f3253d5b40e3..25476f78a6a0 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -59,8 +59,7 @@ class TargetOptions; /// report the error(s). bool ParseDiagnosticArgs(DiagnosticOptions &Opts, llvm::opt::ArgList &Args, DiagnosticsEngine *Diags = nullptr, - bool DefaultDiagColor = true, - bool DefaultShowOpt = true); + bool DefaultDiagColor = true); class CompilerInvocationBase { public: diff --git a/clang/include/clang/Frontend/FrontendAction.h b/clang/include/clang/Frontend/FrontendAction.h index e994e24cf5af..c9f9f080c141 100644 --- a/clang/include/clang/Frontend/FrontendAction.h +++ b/clang/include/clang/Frontend/FrontendAction.h @@ -312,6 +312,7 @@ class WrapperFrontendAction : public FrontendAction { bool BeginSourceFileAction(CompilerInstance &CI) override; void ExecuteAction() override; void EndSourceFileAction() override; + bool shouldEraseOutputFiles() override; public: /// Construct a WrapperFrontendAction from an existing action, taking diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index 89ac20075fa4..9ca2bfda2138 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -119,17 +119,13 @@ class GenerateModuleAction : public ASTFrontendAction { bool hasASTFileSupport() const override { return false; } }; -class GenerateInterfaceStubAction : public ASTFrontendAction { -protected: - TranslationUnitKind getTranslationUnitKind() override { return TU_Module; } - - bool hasASTFileSupport() const override { return false; } -}; - -class GenerateInterfaceIfsExpV1Action : public GenerateInterfaceStubAction { +class GenerateInterfaceStubsAction : public ASTFrontendAction { protected: std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; + + TranslationUnitKind getTranslationUnitKind() override { return TU_Module; } + bool hasASTFileSupport() const override { return false; } }; class GenerateModuleFromModuleMapAction : public GenerateModuleAction { diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 66fec6436a40..6069b5eea265 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -90,7 +90,7 @@ enum ActionKind { GeneratePCH, /// Generate Interface Stub Files. - GenerateInterfaceIfsExpV1, + GenerateInterfaceStubs, /// Only execute frontend initialization. InitOnly, diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp index 00bc854a8804..9f45a8efe546 100644 --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -146,7 +146,9 @@ static bool isSubclass(const Decl *D, } static bool isOSObjectSubclass(const Decl *D) { - return D && isSubclass(D, "OSMetaClassBase"); + // OSSymbols are particular OSObjects that are allocated globally + // and therefore aren't really refcounted, so we ignore them. + return D && isSubclass(D, "OSMetaClassBase") && !isSubclass(D, "OSSymbol"); } static bool isOSObjectDynamicCast(StringRef S) { diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index dd8f11101107..5fd7d304f8f4 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -659,7 +659,7 @@ void VisibleModuleSet::setVisible(Module *M, SourceLocation Loc, VisitModule({M, nullptr}); } -ASTSourceDescriptor::ASTSourceDescriptor(const Module &M) +ASTSourceDescriptor::ASTSourceDescriptor(Module &M) : Signature(M.Signature), ClangModule(&M) { if (M.Directory) Path = M.Directory->getName(); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 49c57e9860a6..6d3c2ad66cdc 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1817,9 +1817,10 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, if (auto *templateType = dyn_cast_or_null(TPList->getParam(i))) if (templateType->hasDefaultArgument()) - defaultParameter = + defaultParameter = llvm::APSInt::isSameValue( templateType->getDefaultArgument()->EvaluateKnownConstInt( - CGM.getContext()) == TA.getAsIntegral(); + CGM.getContext()), + TA.getAsIntegral()); TemplateParams.push_back(DBuilder.createTemplateValueParameter( TheCU, Name, TTy, defaultParameter, diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index db78309e9fd9..35b926808492 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -617,6 +617,13 @@ class CGObjCGNU : public CGObjCRuntime { llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; + + virtual llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD); + + llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override { + return GenerateProtocolRef(PD); + } + llvm::Function *ModuleInitFunction() override; llvm::FunctionCallee GetPropertyGetFunction() override; llvm::FunctionCallee GetPropertySetFunction() override; @@ -1348,7 +1355,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { void GenerateProtocol(const ObjCProtocolDecl *PD) override { // Do nothing - we only emit referenced protocols. } - llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) { + llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) override { std::string ProtocolName = PD->getNameAsString(); auto *&Protocol = ExistingProtocols[ProtocolName]; if (Protocol) @@ -3039,13 +3046,18 @@ CGObjCGNU::GenerateProtocolList(ArrayRef Protocols) { llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) { + auto protocol = GenerateProtocolRef(PD); + llvm::Type *T = + CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); + return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); +} + +llvm::Constant *CGObjCGNU::GenerateProtocolRef(const ObjCProtocolDecl *PD) { llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; if (!protocol) GenerateProtocol(PD); assert(protocol && "Unknown protocol"); - llvm::Type *T = - CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); - return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); + return protocol; } llvm::Constant * diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 87fd51b5d8b1..3986310eaa70 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1107,11 +1107,6 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime { void GenerateProtocol(const ObjCProtocolDecl *PD) override; - /// GetOrEmitProtocol - Get the protocol object for the given - /// declaration, emitting it if necessary. The return value has type - /// ProtocolPtrTy. - virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD)=0; - /// GetOrEmitProtocolRef - Get a forward reference to the protocol /// object for the given declaration, emitting it if needed. These /// forward references will be filled in with empty bodies if no diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index c34758c7e3b3..39efe040302d 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "CGObjCRuntime.h" -#include "CGCleanup.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -383,3 +384,9 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(); return MessageSendInfo(argsInfo, signatureType); } + +llvm::Constant * +clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM, + const ObjCProtocolDecl *protocol) { + return CGM.getObjCRuntime().GetOrEmitProtocol(protocol); +} diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h index f0b3525cfde2..a2c189585f7b 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.h +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -211,6 +211,11 @@ class CGObjCRuntime { /// implementations. virtual void GenerateProtocol(const ObjCProtocolDecl *OPD) = 0; + /// GetOrEmitProtocol - Get the protocol object for the given + /// declaration, emitting it if necessary. The return value has type + /// ProtocolPtrTy. + virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) = 0; + /// Generate a function preamble for a method with the specified /// types. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ae98433acb48..6642851a56bc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5374,7 +5374,7 @@ std::pair CGOpenMPRuntime::emitDependClause( llvm::Value *Size; QualType Ty = E->getType(); if (OASE) { - Size = llvm::ConstantInt::get(CGF.SizeTy,/*V=*/1); + Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); for (const Expr *SE : OASE->getDimensions()) { llvm::Value *Sz = CGF.EmitScalarExpr(SE); Sz = CGF.EmitScalarConversion(Sz, SE->getType(), @@ -7448,6 +7448,20 @@ class MappableExprsHandler { llvm::Value *getExprTypeSize(const Expr *E) const { QualType ExprTy = E->getType().getCanonicalType(); + // Calculate the size for array shaping expression. + if (const auto *OAE = dyn_cast(E)) { + llvm::Value *Size = + CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); + for (const Expr *SE : OAE->getDimensions()) { + llvm::Value *Sz = CGF.EmitScalarExpr(SE); + Sz = CGF.EmitScalarConversion(Sz, SE->getType(), + CGF.getContext().getSizeType(), + SE->getExprLoc()); + Size = CGF.Builder.CreateNUWMul(Size, Sz); + } + return Size; + } + // Reference types are ignored for mapping purposes. if (const auto *RefTy = ExprTy->getAs()) ExprTy = RefTy->getPointeeType().getCanonicalType(); @@ -7779,6 +7793,7 @@ class MappableExprsHandler { const Expr *AssocExpr = I->getAssociatedExpression(); const auto *AE = dyn_cast(AssocExpr); const auto *OASE = dyn_cast(AssocExpr); + const auto *OAShE = dyn_cast(AssocExpr); if (isa(AssocExpr)) { // The base is the 'this' pointer. The content of the pointer is going @@ -7788,6 +7803,11 @@ class MappableExprsHandler { (OASE && isa(OASE->getBase()->IgnoreParenImpCasts()))) { BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); + } else if (OAShE && + isa(OAShE->getBase()->IgnoreParenCasts())) { + BP = Address( + CGF.EmitScalarExpr(OAShE->getBase()), + CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); } else { // The base is the reference to the variable. // BP = &Var. @@ -7870,9 +7890,12 @@ class MappableExprsHandler { // types. const auto *OASE = dyn_cast(I->getAssociatedExpression()); + const auto *OAShE = + dyn_cast(I->getAssociatedExpression()); const auto *UO = dyn_cast(I->getAssociatedExpression()); const auto *BO = dyn_cast(I->getAssociatedExpression()); bool IsPointer = + OAShE || (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) .getCanonicalType() ->isAnyPointerType()) || @@ -7890,8 +7913,15 @@ class MappableExprsHandler { isa(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) - .getAddress(CGF); + Address LB = Address::invalid(); + if (OAShE) { + LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), + CGF.getContext().getTypeAlignInChars( + OAShE->getBase()->getType())); + } else { + LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); + } // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 1f06514a38c1..024fc068c217 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -848,55 +848,54 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass) SanOpts.Mask &= ~SanitizerKind::Null; - if (D) { - // Apply xray attributes to the function (as a string, for now) - if (const auto *XRayAttr = D->getAttr()) { - if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::FunctionEntry) || - CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::FunctionExit)) { - if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction()) - Fn->addFnAttr("function-instrument", "xray-always"); - if (XRayAttr->neverXRayInstrument()) - Fn->addFnAttr("function-instrument", "xray-never"); - if (const auto *LogArgs = D->getAttr()) - if (ShouldXRayInstrumentFunction()) - Fn->addFnAttr("xray-log-args", - llvm::utostr(LogArgs->getArgumentCount())); - } - } else { - if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc)) - Fn->addFnAttr( - "xray-instruction-threshold", - llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + // Apply xray attributes to the function (as a string, for now) + if (const auto *XRayAttr = D ? D->getAttr() : nullptr) { + if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionEntry) || + CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionExit)) { + if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction()) + Fn->addFnAttr("function-instrument", "xray-always"); + if (XRayAttr->neverXRayInstrument()) + Fn->addFnAttr("function-instrument", "xray-never"); + if (const auto *LogArgs = D->getAttr()) + if (ShouldXRayInstrumentFunction()) + Fn->addFnAttr("xray-log-args", + llvm::utostr(LogArgs->getArgumentCount())); } + } else { + if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc)) + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + } - if (ShouldXRayInstrumentFunction()) { - if (CGM.getCodeGenOpts().XRayIgnoreLoops) - Fn->addFnAttr("xray-ignore-loops"); + if (ShouldXRayInstrumentFunction()) { + if (CGM.getCodeGenOpts().XRayIgnoreLoops) + Fn->addFnAttr("xray-ignore-loops"); - if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::FunctionExit)) - Fn->addFnAttr("xray-skip-exit"); + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionExit)) + Fn->addFnAttr("xray-skip-exit"); - if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::FunctionEntry)) - Fn->addFnAttr("xray-skip-entry"); - } + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionEntry)) + Fn->addFnAttr("xray-skip-entry"); + } - unsigned Count, Offset; - if (const auto *Attr = D->getAttr()) { - Count = Attr->getCount(); - Offset = Attr->getOffset(); - } else { - Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount; - Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset; - } - if (Count && Offset <= Count) { - Fn->addFnAttr("patchable-function-entry", std::to_string(Count - Offset)); - if (Offset) - Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset)); - } + unsigned Count, Offset; + if (const auto *Attr = + D ? D->getAttr() : nullptr) { + Count = Attr->getCount(); + Offset = Attr->getOffset(); + } else { + Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount; + Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset; + } + if (Count && Offset <= Count) { + Fn->addFnAttr("patchable-function-entry", std::to_string(Count - Offset)); + if (Offset) + Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset)); } // Add no-jump-tables value. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 6a0f82a9f9a0..caa71b10f231 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -609,7 +609,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm::Type *PointeeType = ConvertTypeForMem(ETy); if (PointeeType->isVoidTy()) PointeeType = llvm::Type::getInt8Ty(getLLVMContext()); - unsigned AS = Context.getTargetAddressSpace(ETy); + + unsigned AS = PointeeType->isFunctionTy() + ? getDataLayout().getProgramAddressSpace() + : Context.getTargetAddressSpace(ETy); + ResultType = llvm::PointerType::get(PointeeType, AS); break; } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 06e4686ac2b9..2cec0dc9de22 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -103,6 +103,18 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, return DAL; } +bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( + llvm::AMDGPU::GPUKind Kind) { + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + + // Default to enabling f32 denormals by default on subtargets where fma is + // fast with denormals + const bool BothDenormAndFMAFast = + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); + return !BothDenormAndFMAFast; +} + llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind, const llvm::fltSemantics *FPType) const { @@ -121,18 +133,10 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); - // Default to enabling f32 denormals by default on subtargets where fma is - // fast with denormals - - const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); - const bool DefaultDenormsAreZeroForTarget = - (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && - (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); - // TODO: There are way too many flags that change this. Do we need to check // them all? bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || - !DefaultDenormsAreZeroForTarget; + getDefaultDenormsAreZeroForTarget(Kind); // Outputs are flushed to zero, preserving sign return DAZ ? llvm::DenormalMode::getPreserveSign() : llvm::DenormalMode::getIEEE(); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 78c40580b302..e7a873efb008 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -13,6 +13,8 @@ #include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" +#include "llvm/Support/TargetParser.h" + #include namespace clang { @@ -67,6 +69,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; + /// Return whether denormals should be flushed, and treated as 0 by default + /// for the subtarget. + static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind); + llvm::DenormalMode getDefaultDenormalModeForType( const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ce850a8d3ef8..603d04f0a9b3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3579,9 +3579,9 @@ static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args, CmdArgs.push_back("-fno-diagnostics-fixit-info"); // Enable -fdiagnostics-show-option by default. - if (Args.hasFlag(options::OPT_fdiagnostics_show_option, - options::OPT_fno_diagnostics_show_option)) - CmdArgs.push_back("-fdiagnostics-show-option"); + if (!Args.hasFlag(options::OPT_fdiagnostics_show_option, + options::OPT_fno_diagnostics_show_option, true)) + CmdArgs.push_back("-fno-diagnostics-show-option"); if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) { @@ -4273,7 +4273,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, StringRef ArgStr = Args.hasArg(options::OPT_interface_stub_version_EQ) ? Args.getLastArgValue(options::OPT_interface_stub_version_EQ) - : "experimental-ifs-v1"; + : "experimental-ifs-v2"; CmdArgs.push_back("-emit-interface-stubs"); CmdArgs.push_back( Args.MakeArgString(Twine("-interface-stub-version=") + ArgStr.str())); @@ -4733,9 +4733,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Decide whether to use verbose asm. Verbose assembly is the default on // toolchains which have the integrated assembler on by default. bool IsIntegratedAssemblerDefault = TC.IsIntegratedAssemblerDefault(); - if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, - IsIntegratedAssemblerDefault)) - CmdArgs.push_back("-masm-verbose"); + if (!Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, + IsIntegratedAssemblerDefault)) + CmdArgs.push_back("-fno-verbose-asm"); if (!TC.useIntegratedAs()) CmdArgs.push_back("-no-integrated-as"); @@ -5220,15 +5220,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } // Pass -fmessage-length=. - CmdArgs.push_back("-fmessage-length"); + unsigned MessageLength = 0; if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) { - CmdArgs.push_back(A->getValue()); + StringRef V(A->getValue()); + if (V.getAsInteger(0, MessageLength)) + D.Diag(diag::err_drv_invalid_argument_to_option) + << V << A->getOption().getName(); } else { // If -fmessage-length=N was not specified, determine whether this is a // terminal and, if so, implicitly define -fmessage-length appropriately. - unsigned N = llvm::sys::Process::StandardErrColumns(); - CmdArgs.push_back(Args.MakeArgString(Twine(N))); + MessageLength = llvm::sys::Process::StandardErrColumns(); } + if (MessageLength != 0) + CmdArgs.push_back( + Args.MakeArgString("-fmessage-length=" + Twine(MessageLength))); // -fvisibility= and -fvisibility-ms-compat are of a piece. if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ, @@ -5701,7 +5706,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fexperimental_new_pass_manager, options::OPT_fno_experimental_new_pass_manager); - ObjCRuntime Runtime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind); + ObjCRuntime Runtime = AddObjCRuntimeArgs(Args, Inputs, CmdArgs, rewriteKind); RenderObjCOptions(TC, D, RawTriple, Args, Runtime, rewriteKind != RK_None, Input, CmdArgs); @@ -6408,6 +6413,7 @@ Clang::~Clang() {} /// /// Returns true if the runtime is non-fragile. ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, + const InputInfoList &inputs, ArgStringList &cmdArgs, RewriteKind rewriteKind) const { // Look for the controlling runtime option. @@ -6531,8 +6537,11 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple()); } - cmdArgs.push_back( - args.MakeArgString("-fobjc-runtime=" + runtime.getAsString())); + if (llvm::any_of(inputs, [](const InputInfo &input) { + return types::isObjC(input.getType()); + })) + cmdArgs.push_back( + args.MakeArgString("-fobjc-runtime=" + runtime.getAsString())); return runtime; } diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 1552515c1461..64af2fdd5115 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -77,6 +77,7 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool { enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile }; ObjCRuntime AddObjCRuntimeArgs(const llvm::opt::ArgList &args, + const InputInfoList &inputs, llvm::opt::ArgStringList &cmdArgs, RewriteKind rewrite) const; diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 157dca7e0c8d..e4ace81dbac7 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "HIP.h" +#include "AMDGPU.h" #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" @@ -16,6 +17,7 @@ #include "clang/Driver/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetParser.h" using namespace clang::driver; using namespace clang::driver::toolchains; @@ -266,7 +268,7 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const ArgList &Args) - : ToolChain(D, Triple, Args), HostTC(HostTC) { + : AMDGPUToolChain(D, Triple, Args), HostTC(HostTC) { // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); @@ -283,6 +285,7 @@ void HIPToolChain::addClangTargetOptions( (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); CC1Args.push_back("-target-cpu"); CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); @@ -343,11 +346,14 @@ void HIPToolChain::addClangTargetOptions( std::string GFXVersion = GpuArch.drop_front(3).str(); std::string ISAVerBC = "oclc_isa_version_" + GFXVersion + ".amdgcn.bc"; - llvm::StringRef FlushDenormalControlBC; - if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero)) - FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc"; - else - FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; + bool FTZDAZ = DriverArgs.hasFlag( + options::OPT_fcuda_flush_denormals_to_zero, + options::OPT_fno_cuda_flush_denormals_to_zero, + getDefaultDenormsAreZeroForTarget(Kind)); + + std::string FlushDenormalControlBC = FTZDAZ ? + "oclc_daz_opt_on.amdgcn.bc" : + "oclc_daz_opt_off.amdgcn.bc"; llvm::StringRef WaveFrontSizeBC; if (stoi(GFXVersion) < 1000) @@ -357,7 +363,7 @@ void HIPToolChain::addClangTargetOptions( BCLibs.append({"hip.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", - std::string(FlushDenormalControlBC), + FlushDenormalControlBC, "oclc_correctly_rounded_sqrt_on.amdgcn.bc", "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC, std::string(WaveFrontSizeBC)}); diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h index c4f944e458bf..b6a3a2718635 100644 --- a/clang/lib/Driver/ToolChains/HIP.h +++ b/clang/lib/Driver/ToolChains/HIP.h @@ -11,6 +11,7 @@ #include "clang/Driver/ToolChain.h" #include "clang/Driver/Tool.h" +#include "AMDGPU.h" namespace clang { namespace driver { @@ -72,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool { namespace toolchains { -class LLVM_LIBRARY_VISIBILITY HIPToolChain : public ToolChain { +class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public AMDGPUToolChain { public: HIPToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, const llvm::opt::ArgList &Args); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0ff7b179b653..42eb121a8849 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -859,7 +859,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.CoverageMapping = Args.hasFlag(OPT_fcoverage_mapping, OPT_fno_coverage_mapping, false); Opts.DumpCoverageMapping = Args.hasArg(OPT_dump_coverage_mapping); - Opts.AsmVerbose = Args.hasArg(OPT_masm_verbose); + Opts.AsmVerbose = !Args.hasArg(OPT_fno_verbose_asm); Opts.PreserveAsmComments = !Args.hasArg(OPT_fno_preserve_as_comments); Opts.AssumeSaneOperatorNew = !Args.hasArg(OPT_fno_assume_sane_operator_new); Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions); @@ -1553,7 +1553,7 @@ static bool checkVerifyPrefixes(const std::vector &VerifyPrefixes, bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, DiagnosticsEngine *Diags, - bool DefaultDiagColor, bool DefaultShowOpt) { + bool DefaultDiagColor) { bool Success = true; Opts.DiagnosticLogFile = @@ -1571,9 +1571,7 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, Opts.ShowFixits = !Args.hasArg(OPT_fno_diagnostics_fixit_info); Opts.ShowLocation = !Args.hasArg(OPT_fno_show_source_location); Opts.AbsolutePath = Args.hasArg(OPT_fdiagnostics_absolute_paths); - Opts.ShowOptionNames = - Args.hasFlag(OPT_fdiagnostics_show_option, - OPT_fno_diagnostics_show_option, DefaultShowOpt); + Opts.ShowOptionNames = !Args.hasArg(OPT_fno_diagnostics_show_option); llvm::sys::Process::UseANSIEscapeCodes(Args.hasArg(OPT_fansi_escape_codes)); @@ -1681,7 +1679,8 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, Diags->Report(diag::warn_ignoring_ftabstop_value) << Opts.TabStop << DiagnosticOptions::DefaultTabStop; } - Opts.MessageLength = getLastArgIntValue(Args, OPT_fmessage_length, 0, Diags); + Opts.MessageLength = + getLastArgIntValue(Args, OPT_fmessage_length_EQ, 0, Diags); addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, Opts.Warnings); addDiagnosticArgs(Args, OPT_R_Group, OPT_R_value_Group, Opts.Remarks); @@ -1787,25 +1786,26 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, StringRef ArgStr = Args.hasArg(OPT_interface_stub_version_EQ) ? Args.getLastArgValue(OPT_interface_stub_version_EQ) - : "experimental-ifs-v1"; + : "experimental-ifs-v2"; if (ArgStr == "experimental-yaml-elf-v1" || + ArgStr == "experimental-ifs-v1" || ArgStr == "experimental-tapi-elf-v1") { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + " is deprecated."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " - "-interface-stub-version=experimental-ifs-v1" + "-interface-stub-version=experimental-ifs-v2" << ErrorMessage; - } else if (ArgStr != "experimental-ifs-v1") { + } else if (!ArgStr.startswith("experimental-ifs-")) { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + "."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " - "-interface-stub-version=experimental-ifs-v1" + "-interface-stub-version=experimental-ifs-v2" << ErrorMessage; } else { - Opts.ProgramAction = frontend::GenerateInterfaceIfsExpV1; + Opts.ProgramAction = frontend::GenerateInterfaceStubs; } break; } @@ -3385,7 +3385,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { case frontend::GenerateModuleInterface: case frontend::GenerateHeaderModule: case frontend::GeneratePCH: - case frontend::GenerateInterfaceIfsExpV1: + case frontend::GenerateInterfaceStubs: case frontend::ParseSyntaxOnly: case frontend::ModuleFileInfo: case frontend::VerifyPCH: @@ -3613,9 +3613,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, Diags.Report(diag::err_fe_dependency_file_requires_MT); Success = false; } - Success &= - ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, - false /*DefaultDiagColor*/, false /*DefaultShowOpt*/); + Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, + /*DefaultDiagColor=*/false); ParseCommentArgs(LangOpts.CommentOpts, Args); ParseFileSystemArgs(Res.getFileSystemOpts(), Args); // FIXME: We shouldn't have to pass the DashX option around here diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 1dc85d967ca0..0155238dd0a8 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -1081,6 +1081,9 @@ void WrapperFrontendAction::ExecuteAction() { void WrapperFrontendAction::EndSourceFileAction() { WrappedAction->EndSourceFileAction(); } +bool WrapperFrontendAction::shouldEraseOutputFiles() { + return WrappedAction->shouldEraseOutputFiles(); +} bool WrapperFrontendAction::usesPreprocessorOnly() const { return WrappedAction->usesPreprocessorOnly(); diff --git a/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp b/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp index 2b7f0f8f9b66..b7c1e693413b 100644 --- a/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp +++ b/clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp @@ -290,7 +290,7 @@ class InterfaceStubFunctionsConsumer : public ASTConsumer { const ASTContext &context, StringRef Format, raw_ostream &OS) -> void { OS << "--- !" << Format << "\n"; - OS << "IfsVersion: 1.0\n"; + OS << "IfsVersion: 2.0\n"; OS << "Triple: " << T.str() << "\n"; OS << "ObjectFileFormat: " << "ELF" @@ -299,11 +299,11 @@ class InterfaceStubFunctionsConsumer : public ASTConsumer { for (const auto &E : Symbols) { const MangledSymbol &Symbol = E.second; for (auto Name : Symbol.Names) { - OS << " \"" + OS << " - { Name: \"" << (Symbol.ParentName.empty() || Instance.getLangOpts().CPlusPlus ? "" : (Symbol.ParentName + ".")) - << Name << "\" : { Type: "; + << Name << "\", Type: "; switch (Symbol.Type) { default: llvm_unreachable( @@ -330,15 +330,15 @@ class InterfaceStubFunctionsConsumer : public ASTConsumer { OS.flush(); }; - assert(Format == "experimental-ifs-v1" && "Unexpected IFS Format."); + assert(Format == "experimental-ifs-v2" && "Unexpected IFS Format."); writeIfsV1(Instance.getTarget().getTriple(), Symbols, context, Format, *OS); } }; } // namespace std::unique_ptr -GenerateInterfaceIfsExpV1Action::CreateASTConsumer(CompilerInstance &CI, - StringRef InFile) { +GenerateInterfaceStubsAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { return std::make_unique( - CI, InFile, "experimental-ifs-v1"); + CI, InFile, "experimental-ifs-v2"); } diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index ab7a1e32e301..7c59ae42d2a2 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -65,8 +65,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) { case GenerateHeaderModule: return std::make_unique(); case GeneratePCH: return std::make_unique(); - case GenerateInterfaceIfsExpV1: - return std::make_unique(); + case GenerateInterfaceStubs: + return std::make_unique(); case InitOnly: return std::make_unique(); case ParseSyntaxOnly: return std::make_unique(); case ModuleFileInfo: return std::make_unique(); diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 3b30ddbd527b..c2c57cadfdf2 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -650,28 +650,28 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_mul(v128_t __a, return (v128_t)((__u8x16)__a * (__u8x16)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)__a, (__i8x16)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_u_i8x16((__i8x16)__a, (__i8x16)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)__a, (__i8x16)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_u_i8x16((__i8x16)__a, (__i8x16)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_avgr_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_avgr(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i8x16((__i8x16)__a, (__i8x16)__b); } @@ -745,28 +745,28 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, return (v128_t)((__u16x8)__a * (__u16x8)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)__a, (__i16x8)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_u_i16x8((__i16x8)__a, (__i16x8)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)__a, (__i16x8)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_u_i16x8((__i16x8)__a, (__i16x8)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_avgr_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_avgr(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_avgr_u_i16x8((__i16x8)__a, (__i16x8)__b); } @@ -816,23 +816,23 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t __a, return (v128_t)((__u32x4)__a * (__u32x4)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)__a, (__i32x4)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_min(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_min_u_i32x4((__i32x4)__a, (__i32x4)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_s(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)__a, (__i32x4)__b); } -static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_u(v128_t __a, - v128_t __b) { +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_max(v128_t __a, + v128_t __b) { return (v128_t)__builtin_wasm_max_u_i32x4((__i32x4)__a, (__i32x4)__b); } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 9a0f94ae7be5..fc07cfe5ad03 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3001,7 +3001,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, // Match the ')'. T.consumeClose(); RParenLoc = T.getCloseLocation(); - Result = Actions.CorrectDelayedTyposInExpr(ParseExpression()); + Result = Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()); if (ErrorFound) { Result = ExprError(); } else if (!Result.isInvalid()) { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index ff11e97c5783..f4b823768d4c 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1522,48 +1522,65 @@ class DeferredDiagnosticsEmitter } void visitUsedDecl(SourceLocation Loc, Decl *D) { - if (auto *FD = dyn_cast(D)) { - FunctionDecl *Caller = UseStack.empty() ? nullptr : UseStack.back(); - auto IsKnownEmitted = S.getEmissionStatus(FD, /*Final=*/true) == - Sema::FunctionEmissionStatus::Emitted; - if (!Caller) - ShouldEmit = IsKnownEmitted; - if ((!ShouldEmit && !S.getLangOpts().OpenMP && !Caller) || - S.shouldIgnoreInHostDeviceCheck(FD) || Visited.count(D)) - return; - // Finalize analysis of OpenMP-specific constructs. - if (Caller && S.LangOpts.OpenMP && UseStack.size() == 1) - S.finalizeOpenMPDelayedAnalysis(Caller, FD, Loc); - // Finalize analysis of SYCL-specific constructs. - if (Caller && S.LangOpts.SYCLIsDevice) - S.finalizeSYCLDelayedAnalysis(Caller, FD, Loc); - if (Caller) - S.DeviceKnownEmittedFns[FD] = {Caller, Loc}; - if (ShouldEmit || InOMPDeviceContext) - S.emitDeferredDiags(FD, Caller); - Visited.insert(D); - UseStack.push_back(FD); - if (auto *S = FD->getBody()) { - this->Visit(S); - } - UseStack.pop_back(); - Visited.erase(D); - } else if (auto *VD = dyn_cast(D)) { - if (auto *Init = VD->getInit()) { - if (S.LangOpts.SYCLIsDevice) - return; - auto DevTy = OMPDeclareTargetDeclAttr::getDeviceType(VD); - bool IsDev = DevTy && (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost || - *DevTy == OMPDeclareTargetDeclAttr::DT_Any); - if (IsDev) - ++InOMPDeviceContext; - this->Visit(Init); - if (IsDev) - --InOMPDeviceContext; - } - } else + if (isa(D)) + return; + if (auto *FD = dyn_cast(D)) + checkFunc(Loc, FD); + else Inherited::visitUsedDecl(Loc, D); } + + void checkVar(VarDecl *VD) { + if (S.LangOpts.SYCLIsDevice) + return; + assert(VD->isFileVarDecl() && + "Should only check file-scope variables"); + if (auto *Init = VD->getInit()) { + auto DevTy = OMPDeclareTargetDeclAttr::getDeviceType(VD); + bool IsDev = DevTy && (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost || + *DevTy == OMPDeclareTargetDeclAttr::DT_Any); + if (IsDev) + ++InOMPDeviceContext; + this->Visit(Init); + if (IsDev) + --InOMPDeviceContext; + } + } + + void checkFunc(SourceLocation Loc, FunctionDecl *FD) { + FunctionDecl *Caller = UseStack.empty() ? nullptr : UseStack.back(); + auto IsKnownEmitted = S.getEmissionStatus(FD, /*Final=*/true) == + Sema::FunctionEmissionStatus::Emitted; + if (!Caller) + ShouldEmit = IsKnownEmitted; + if ((!ShouldEmit && !S.getLangOpts().OpenMP && !Caller) || + S.shouldIgnoreInHostDeviceCheck(FD) || Visited.count(FD)) + return; + // Finalize analysis of OpenMP-specific constructs. + if (Caller && S.LangOpts.OpenMP && UseStack.size() == 1) + S.finalizeOpenMPDelayedAnalysis(Caller, FD, Loc); + // Finalize analysis of SYCL-specific constructs. + if (Caller && S.LangOpts.SYCLIsDevice) + S.finalizeSYCLDelayedAnalysis(Caller, FD, Loc); + if (Caller) + S.DeviceKnownEmittedFns[FD] = {Caller, Loc}; + if (ShouldEmit || InOMPDeviceContext) + S.emitDeferredDiags(FD, Caller); + Visited.insert(FD); + UseStack.push_back(FD); + if (auto *S = FD->getBody()) { + this->Visit(S); + } + UseStack.pop_back(); + Visited.erase(FD); + } + + void checkRecordedDecl(Decl *D) { + if (auto *FD = dyn_cast(D)) + checkFunc(SourceLocation(), FD); + else + checkVar(cast(D)); + } }; } // namespace @@ -1579,7 +1596,7 @@ void Sema::emitDeferredDiags() { DeferredDiagnosticsEmitter DDE(*this); for (auto D : DeclsToCheckForDeferredDiags) - DDE.visitUsedDecl(SourceLocation(), D); + DDE.checkRecordedDecl(D); } // In CUDA, there are some constructs which may appear in semantically-valid diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 74e65eaacfdb..98de799415a0 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12270,7 +12270,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { VDecl->setInitStyle(VarDecl::ListInit); } - if (LangOpts.OpenMP && VDecl->hasGlobalStorage()) + if (LangOpts.OpenMP && VDecl->isFileVarDecl()) DeclsToCheckForDeferredDiags.push_back(VDecl); CheckCompleteVariableDeclaration(VDecl); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 8f13d8a58577..19532dad6de2 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4835,10 +4835,13 @@ ExprResult Sema::ActOnOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc, if (!BaseTy->isPointerType() && Base->isTypeDependent()) return OMPArrayShapingExpr::Create(Context, Context.DependentTy, Base, LParenLoc, RParenLoc, Dims, Brackets); - if (!BaseTy->isPointerType()) + if (!BaseTy->isPointerType() || + (!Base->isTypeDependent() && + BaseTy->getPointeeType()->isIncompleteType())) return ExprError(Diag(Base->getExprLoc(), diag::err_omp_non_pointer_type_array_shaping_base) << Base->getSourceRange()); + SmallVector NewDims; bool ErrorFound = false; for (Expr *Dim : Dims) { diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index f9e8e3d6ccc8..7d2ae172fe4d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1943,7 +1943,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, if (isa(EI->getAssociatedExpression()) || isa(EI->getAssociatedExpression()) || - isa(EI->getAssociatedExpression())) { + isa(EI->getAssociatedExpression()) || + isa(EI->getAssociatedExpression())) { IsVariableAssociatedWithSection = true; // There is nothing more we need to know about this variable. return true; @@ -3225,7 +3226,7 @@ class DSAAttrChecker final : public StmtVisitor { StackComponents, OpenMPClauseKind) { // Variable is used if it has been marked as an array, array - // section or the variable iself. + // section, array shaping or the variable iself. return StackComponents.size() == 1 || std::all_of( std::next(StackComponents.rbegin()), @@ -3236,6 +3237,8 @@ class DSAAttrChecker final : public StmtVisitor { nullptr && (isa( MC.getAssociatedExpression()) || + isa( + MC.getAssociatedExpression()) || isa( MC.getAssociatedExpression())); }); @@ -3393,8 +3396,10 @@ class DSAAttrChecker final : public StmtVisitor { // Do both expressions have the same kind? if (CCI->getAssociatedExpression()->getStmtClass() != SC.getAssociatedExpression()->getStmtClass()) - if (!(isa( - SC.getAssociatedExpression()) && + if (!((isa( + SC.getAssociatedExpression()) || + isa( + SC.getAssociatedExpression())) && isa( CCI->getAssociatedExpression()))) return false; @@ -16284,6 +16289,15 @@ class MapBaseChecker final : public StmtVisitor { Components.emplace_back(OASE, nullptr); return RelevantExpr || Visit(E); } + bool VisitOMPArrayShapingExpr(OMPArrayShapingExpr *E) { + Expr *Base = E->getBase(); + + // Record the component - we don't have any declaration associated. + Components.emplace_back(E, nullptr); + + return Visit(Base->IgnoreParenImpCasts()); + } + bool VisitUnaryOperator(UnaryOperator *UO) { if (SemaRef.getLangOpts().OpenMP < 50 || !UO->isLValue() || UO->getOpcode() != UO_Deref) { @@ -16409,9 +16423,11 @@ static bool checkMapConflicts( // variable in map clauses of the same construct. if (CurrentRegionOnly && (isa(CI->getAssociatedExpression()) || - isa(CI->getAssociatedExpression())) && + isa(CI->getAssociatedExpression()) || + isa(CI->getAssociatedExpression())) && (isa(SI->getAssociatedExpression()) || - isa(SI->getAssociatedExpression()))) { + isa(SI->getAssociatedExpression()) || + isa(SI->getAssociatedExpression()))) { SemaRef.Diag(CI->getAssociatedExpression()->getExprLoc(), diag::err_omp_multiple_array_items_in_map_clause) << CI->getAssociatedExpression()->getSourceRange(); @@ -16443,6 +16459,9 @@ static bool checkMapConflicts( const Expr *E = OASE->getBase()->IgnoreParenImpCasts(); Type = OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); + } else if (const auto *OASE = dyn_cast( + SI->getAssociatedExpression())) { + Type = OASE->getBase()->getType()->getPointeeType(); } if (Type.isNull() || Type->isAnyPointerType() || checkArrayExpressionDoesNotReferToWholeSize( @@ -16905,6 +16924,7 @@ static void checkMappableExpressionList( QualType Type; auto *ASE = dyn_cast(VE->IgnoreParens()); auto *OASE = dyn_cast(VE->IgnoreParens()); + auto *OAShE = dyn_cast(VE->IgnoreParens()); if (ASE) { Type = ASE->getType().getNonReferenceType(); } else if (OASE) { @@ -16915,6 +16935,8 @@ static void checkMappableExpressionList( else Type = BaseType->getPointeeType(); Type = Type.getNonReferenceType(); + } else if (OAShE) { + Type = OAShE->getBase()->getType()->getPointeeType(); } else { Type = VE->getType(); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 7437f649a090..bea9bdd22bab 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8514,7 +8514,7 @@ unsigned ASTReader::getModuleFileID(ModuleFile *F) { llvm::Optional ASTReader::getSourceDescriptor(unsigned ID) { - if (const Module *M = getSubmodule(ID)) + if (Module *M = getSubmodule(ID)) return ASTSourceDescriptor(*M); // If there is only a single PCH, return it instead. diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index 002233e49bb0..d869796b82c1 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -57,6 +57,11 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) { } } + // Errors that do not prevent the PCH from being written should not cause the + // overall compilation to fail either. + if (AllowASTWithErrors) + PP.getDiagnostics().getClient()->clear(); + // Emit the PCH file to the Buffer. assert(SemaPtr && "No Sema?"); Buffer->Signature = diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index e4b720df6b11..0b8d100992a2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -508,13 +508,7 @@ void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const { /// return expressions of ObjC types when the return type of the function or /// method is non-null but the express is not. static const Expr *lookThroughImplicitCasts(const Expr *E) { - assert(E); - - while (auto *ICE = dyn_cast(E)) { - E = ICE->getSubExpr(); - } - - return E; + return E->IgnoreImpCasts(); } /// This method check when nullable pointer or null value is returned from a diff --git a/clang/test/Analysis/osobject-retain-release.cpp b/clang/test/Analysis/osobject-retain-release.cpp index 41606a30c39f..d88349dcd807 100644 --- a/clang/test/Analysis/osobject-retain-release.cpp +++ b/clang/test/Analysis/osobject-retain-release.cpp @@ -53,6 +53,9 @@ struct MyArray : public OSArray { OSObject *generateObject(OSObject *input) override; }; +// These are never refcounted. +struct OSSymbol : OSObject {}; + struct OtherStruct { static void doNothingToArray(OSArray *array); OtherStruct(OSArray *arr); @@ -754,3 +757,10 @@ void test() { b(0); } } // namespace inherited_constructor_crash + +namespace ossymbol_suppression { +OSSymbol *createSymbol(); +void test() { + OSSymbol *sym = createSymbol(); // no-warning +} +} // namespace ossymbol_suppression diff --git a/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1-cxx11.cpp b/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1-cxx11.cpp index f812ea1bd8be..1afea99e8895 100644 --- a/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1-cxx11.cpp +++ b/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1-cxx11.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fdiagnostics-show-option -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s template struct set{}; diff --git a/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1.cpp b/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1.cpp index bb6bb73ec702..e3599db18350 100644 --- a/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1.cpp +++ b/clang/test/CXX/basic/basic.lookup/basic.lookup.classref/p1.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-option -verify %s -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-option -verify -std=c++98 %s -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-option -verify -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s // C++98 [basic.lookup.classref]p1: // In a class member access expression (5.2.5), if the . or -> token is diff --git a/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-cxx03-extra-copy.cpp b/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-cxx03-extra-copy.cpp index 7a5caef36e73..e3190245d240 100644 --- a/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-cxx03-extra-copy.cpp +++ b/clang/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-cxx03-extra-copy.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c++03 -fdiagnostics-show-option -Wbind-to-temporary-copy -verify %s +// RUN: %clang_cc1 -fsyntax-only -std=c++03 -Wbind-to-temporary-copy -verify %s // C++03 requires that we check for a copy constructor when binding a // reference to a temporary, since we are allowed to make a copy, Even diff --git a/clang/test/CodeGen/builtins-systemz-zvector.c b/clang/test/CodeGen/builtins-systemz-zvector.c index 6cba71098792..da0e720c9fae 100644 --- a/clang/test/CodeGen/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/builtins-systemz-zvector.c @@ -3665,31 +3665,31 @@ void test_integer(void) { // CHECK-ASM: vsumqg idx = vec_test_mask(vsc, vuc); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vuc, vuc); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vss, vus); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vus, vus); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vsi, vui); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vui, vui); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vsl, vul); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vul, vul); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vd, vul); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm } diff --git a/clang/test/CodeGen/builtins-systemz-zvector2.c b/clang/test/CodeGen/builtins-systemz-zvector2.c index 1880fed64dbc..a4f791e6019b 100644 --- a/clang/test/CodeGen/builtins-systemz-zvector2.c +++ b/clang/test/CodeGen/builtins-systemz-zvector2.c @@ -654,10 +654,10 @@ void test_integer(void) { // CHECK-ASM: vsrlb idx = vec_test_mask(vf, vui); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm idx = vec_test_mask(vd, vul); - // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) // CHECK-ASM: vtm vuc = vec_msum_u128(vul, vul, vuc, 0); diff --git a/clang/test/CodeGen/movbe-builtins.c b/clang/test/CodeGen/movbe-builtins.c index 15f49b84ec67..342f66391388 100644 --- a/clang/test/CodeGen/movbe-builtins.c +++ b/clang/test/CodeGen/movbe-builtins.c @@ -7,7 +7,7 @@ short test_loadbe_i16(const short *P) { // CHECK-LABEL: @test_loadbe_i16 // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1 - // CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]]) + // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]]) return _loadbe_i16(P); } diff --git a/clang/test/CodeGen/rot-intrinsics.c b/clang/test/CodeGen/rot-intrinsics.c index 7b1ffb6ae3a6..dcdc54c4585a 100644 --- a/clang/test/CodeGen/rot-intrinsics.c +++ b/clang/test/CodeGen/rot-intrinsics.c @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG #include diff --git a/clang/test/CodeGen/xray-global-init.cpp b/clang/test/CodeGen/xray-global-init.cpp new file mode 100644 index 000000000000..588be8a45a50 --- /dev/null +++ b/clang/test/CodeGen/xray-global-init.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -fxray-instrument -fxray-instruction-threshold=1 %s -o - \ +// RUN: | FileCheck %s + +struct A { + A(); + ~A(); +}; + +A a; + +// Check that the xray-instruction-threshold was applied +// CHECK: define internal void @_GLOBAL__sub_I_xray_global_init.cpp() [[NUX:#[0-9]+]] section ".text.startup" { +// CHECK: attributes [[NUX]] = { noinline nounwind {{.*}}"xray-instruction-threshold"="1"{{.*}} } diff --git a/clang/test/CodeGenCXX/debug-info-template-parameter.cpp b/clang/test/CodeGenCXX/debug-info-template-parameter.cpp index 95e7a187fe10..c38c535d8b06 100644 --- a/clang/test/CodeGenCXX/debug-info-template-parameter.cpp +++ b/clang/test/CodeGenCXX/debug-info-template-parameter.cpp @@ -8,22 +8,24 @@ // CHECK: DILocalVariable(name: "f1", {{.*}}, type: ![[TEMPLATE_TYPE:[0-9]+]] // CHECK: [[TEMPLATE_TYPE]] = {{.*}}!DICompositeType({{.*}}, templateParams: ![[F1_TYPE:[0-9]+]] -// CHECK: [[F1_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]} +// CHECK: [[F1_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]], ![[THIRD:[0-9]+]]} // CHECK: [[FIRST]] = !DITemplateTypeParameter(name: "T", type: !{{[0-9]*}}) // CHECK: [[SECOND]] = !DITemplateValueParameter(name: "i", type: !{{[0-9]*}}, value: i32 6) +// CHECK: [[THIRD]] = !DITemplateValueParameter(name: "b", type: !{{[0-9]*}}, value: i8 0) // CHECK: DILocalVariable(name: "f2", {{.*}}, type: ![[TEMPLATE_TYPE:[0-9]+]] // CHECK: [[TEMPLATE_TYPE]] = {{.*}}!DICompositeType({{.*}}, templateParams: ![[F2_TYPE:[0-9]+]] -// CHECK: [[F2_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]} +// CHECK: [[F2_TYPE]] = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]], ![[THIRD:[0-9]+]]} // CHECK: [[FIRST]] = !DITemplateTypeParameter(name: "T", type: !{{[0-9]*}}, defaulted: true) // CHECK: [[SECOND]] = !DITemplateValueParameter(name: "i", type: !{{[0-9]*}}, defaulted: true, value: i32 3) +// CHECK: [[THIRD]] = !DITemplateValueParameter(name: "b", type: !{{[0-9]*}}, defaulted: true, value: i8 1) -template +template class foo { }; int main() { - foo f1; + foo f1; foo<> f2; return 0; } diff --git a/clang/test/CodeGenObjC/debug-info-class-extension.m b/clang/test/CodeGenObjC/debug-info-class-extension.m index a27810cce743..db654e6a60a5 100644 --- a/clang/test/CodeGenObjC/debug-info-class-extension.m +++ b/clang/test/CodeGenObjC/debug-info-class-extension.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_objc_complete_type diff --git a/clang/test/CodeGenObjC/debug-info-class-extension2.m b/clang/test/CodeGenObjC/debug-info-class-extension2.m index d4750c120f60..ea7865b4ac5b 100644 --- a/clang/test/CodeGenObjC/debug-info-class-extension2.m +++ b/clang/test/CodeGenObjC/debug-info-class-extension2.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_objc_complete_type @interface Foo {} @end diff --git a/clang/test/CodeGenObjC/debug-info-class-extension3.m b/clang/test/CodeGenObjC/debug-info-class-extension3.m index a9cf6f6a5c59..f81445b47a21 100644 --- a/clang/test/CodeGenObjC/debug-info-class-extension3.m +++ b/clang/test/CodeGenObjC/debug-info-class-extension3.m @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK-NOT: AT_APPLE_objc_complete_type diff --git a/clang/test/CodeGenObjC/debug-info-property.m b/clang/test/CodeGenObjC/debug-info-property.m index 9b471be23dbd..ca013b24be42 100644 --- a/clang/test/CodeGenObjC/debug-info-property.m +++ b/clang/test/CodeGenObjC/debug-info-property.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_property_name // CHECK: AT_APPLE_property_attribute diff --git a/clang/test/CodeGenObjC/debug-info-property2.m b/clang/test/CodeGenObjC/debug-info-property2.m index 6a15922c932c..7e0a5e9f954b 100644 --- a/clang/test/CodeGenObjC/debug-info-property2.m +++ b/clang/test/CodeGenObjC/debug-info-property2.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_property_name @interface C { diff --git a/clang/test/CodeGenObjC/debug-info-property4.m b/clang/test/CodeGenObjC/debug-info-property4.m index f862c85b344d..1f489f2f6b63 100644 --- a/clang/test/CodeGenObjC/debug-info-property4.m +++ b/clang/test/CodeGenObjC/debug-info-property4.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_property_name // CHECK-NOT: AT_APPLE_property_getter diff --git a/clang/test/CodeGenObjC/debug-info-property5.m b/clang/test/CodeGenObjC/debug-info-property5.m index 191da9c16fcc..8b70f1ff2082 100644 --- a/clang/test/CodeGenObjC/debug-info-property5.m +++ b/clang/test/CodeGenObjC/debug-info-property5.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s // CHECK: AT_APPLE_property_name // CHECK: AT_APPLE_property_getter diff --git a/clang/test/CodeGenObjC/property-dbg.m b/clang/test/CodeGenObjC/property-dbg.m index fb70747f5db8..f15213131ccc 100644 --- a/clang/test/CodeGenObjC/property-dbg.m +++ b/clang/test/CodeGenObjC/property-dbg.m @@ -1,5 +1,5 @@ // FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited -masm-verbose -x objective-c < %s | grep DW_AT_name +// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited -x objective-c < %s | grep DW_AT_name @interface Foo { int i; } diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c index 79d8f6f18ab0..2f02970a2a8e 100644 --- a/clang/test/Driver/clang-translation.c +++ b/clang/test/Driver/clang-translation.c @@ -4,7 +4,6 @@ // I386: "-disable-free" // I386: "-mrelocation-model" "static" // I386: "-mframe-pointer=all" -// I386: "-masm-verbose" // I386: "-munwind-tables" // I386: "-Os" // I386: "-fvisibility" diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu index 74f4bbc1585e..5b1046b0cb12 100644 --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -7,6 +7,16 @@ // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// Test explicit argument. +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s + +// Test the default changing with no argument based on the subtarget. +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s + // CPUFTZ-NOT: -fdenormal-fp-math // FTZ-NOT: -fdenormal-fp-math-f32= diff --git a/clang/test/Driver/darwin-objc-options.m b/clang/test/Driver/darwin-objc-options.m index 3e21fb38c0a9..60827f2937ed 100644 --- a/clang/test/Driver/darwin-objc-options.m +++ b/clang/test/Driver/darwin-objc-options.m @@ -31,5 +31,12 @@ // CHECK-CHECK-I386_IOS-NOT: -fobjc-dispatch-method // CHECK-CHECK-I386_IOS: darwin-objc-options +/// Don't add -fobjc-runtime for non-ObjC input. +// RUN: touch %t.c +// RUN: %clang -target x86_64-apple-darwin -x objective-c -S -### %t.c 2>&1 | FileCheck --check-prefix=F %s +// RUN: %clang -target x86_64-apple-darwin -S -### %t.c 2>&1 | FileCheck --check-prefix=NO_F %s +// F: -fobjc-runtime= +// NO_F-NOT: -fobjc-runtime= + // Don't crash with an unexpected target triple. // RUN: %clang -target i386-apple-ios7 -S -### %s diff --git a/clang/test/Driver/fdiagnostics-show-option.c b/clang/test/Driver/fdiagnostics-show-option.c new file mode 100644 index 000000000000..a574503cacee --- /dev/null +++ b/clang/test/Driver/fdiagnostics-show-option.c @@ -0,0 +1,7 @@ +/// -fdiagnostics-show-option is the default +// RUN: %clang -### -c %s 2>&1 | FileCheck --check-prefix=ENABLED %s +// ENABLED-NOT: "-fno-diagnostics-show-option" + +// RUN: %clang -### -c %s -fdiagnostics-show-option -fno-diagnostics-show-option 2>&1 | \ +// RUN: FileCheck --check-prefix=DISABLED %s +// DISABLED: "-fno-diagnostics-show-option" diff --git a/clang/test/Driver/fmessage-length.c b/clang/test/Driver/fmessage-length.c new file mode 100644 index 000000000000..638add05b2e5 --- /dev/null +++ b/clang/test/Driver/fmessage-length.c @@ -0,0 +1,9 @@ +// RUN: %clang -### -c %s -fmessage-length=80 2>&1 | FileCheck %s +// CHECK: "-fmessage-length=80" + +/// Omit -fmessage-length=0 to simplify common CC1 command lines. +// RUN: %clang -### -c %s -fmessage-length=0 2>&1 | FileCheck --check-prefix=ZERO %s +// ZERO-NOT: "-fmessage-length=0" + +// RUN: %clang -### -c %s -fmessage-length=nan 2>&1 | FileCheck --check-prefix=ERR %s +// ERR: error: invalid argument 'nan' to -fmessage-length= diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip index b79cb70cbe68..cb1747c2d798 100644 --- a/clang/test/Driver/hip-device-libs.hip +++ b/clang/test/Driver/hip-device-libs.hip @@ -2,23 +2,94 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// Test flush-denormals-to-zero enabled uses oclc_daz_opt_on +// Test if oclc_daz_opt_on or if oclc_daz_opt_off is linked depending on +// expected denormal mode. +// Test subtarget with flushing on by default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,FLUSHD + + +// Test subtarget with flushing off by ddefault. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,NOFLUSHD + + +// Test explicit flag, opposite of target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ // RUN: -fcuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=COM,FLUSHD -// Test flush-denormals-to-zero disabled uses oclc_daz_opt_off +// Test explicit flag, opposite of target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fno-cuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,NOFLUSHD + + +// Test explicit flag, same as target default. // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fno-cuda-flush-denormals-to-zero \ // RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=COM,NOFLUSHD + +// Test explicit flag, same as target default. +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fcuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,FLUSHD + + +// Test last flag wins, not flushing +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,NOFLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,NOFLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,FLUSHD + + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 \ +// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=COM,FLUSHD + + // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/hip_dev_lib \ @@ -33,4 +104,3 @@ // COM-SAME: "-mlink-builtin-bitcode" "{{.*}}ockl.amdgcn.bc" // FLUSHD-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_on.amdgcn.bc" // NOFLUSHD-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.amdgcn.bc" - diff --git a/clang/test/Driver/integrated-as.c b/clang/test/Driver/integrated-as.c index df5cf1a17ecc..170515579b1a 100644 --- a/clang/test/Driver/integrated-as.c +++ b/clang/test/Driver/integrated-as.c @@ -12,6 +12,7 @@ // NOFIAS-NOT: cc1as // NOFIAS: -cc1 +// NOFIAS: "-fno-verbose-asm" // NOFIAS: -no-integrated-as // RUN: %clang -target arm-linux-androideabi -### \ diff --git a/clang/test/Driver/rewrite-legacy-objc.m b/clang/test/Driver/rewrite-legacy-objc.m index dc92dd4bf107..fb7df4b97c06 100644 --- a/clang/test/Driver/rewrite-legacy-objc.m +++ b/clang/test/Driver/rewrite-legacy-objc.m @@ -3,11 +3,11 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST0: "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" // TEST0: rewrite-legacy-objc.m" // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.9.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST1 %s // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.6.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST2 %s -// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" -// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST1: "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" +// TEST2: "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" diff --git a/clang/test/Driver/rewrite-objc.m b/clang/test/Driver/rewrite-objc.m index b04062992b7f..6073dcdfdafe 100644 --- a/clang/test/Driver/rewrite-objc.m +++ b/clang/test/Driver/rewrite-objc.m @@ -3,4 +3,4 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST0: "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" diff --git a/clang/test/Driver/show-option-names.c b/clang/test/Driver/show-option-names.c deleted file mode 100644 index 9843a4371f14..000000000000 --- a/clang/test/Driver/show-option-names.c +++ /dev/null @@ -1,7 +0,0 @@ -// REQUIRES: x86-registered-target - -// RUN: %clang -target x86_64-apple-darwin -fsyntax-only -isysroot /FOO %s 2>&1 | FileCheck --check-prefix=CHECK-SHOW-OPTION-NAMES %s -// CHECK-SHOW-OPTION-NAMES: warning: no such sysroot directory: '{{([A-Za-z]:.*)?}}/FOO' [-Wmissing-sysroot] - -// RUN: %clang -target x86_64-apple-darwin -fsyntax-only -fno-diagnostics-show-option -isysroot /FOO %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SHOW-OPTION-NAMES %s -// CHECK-NO-SHOW-OPTION-NAMES: warning: no such sysroot directory: '{{([A-Za-z]:.*)?}}/FOO'{{$}} diff --git a/clang/test/Frontend/diagnostics-option-names.c b/clang/test/Frontend/diagnostics-option-names.c index ed0d2ed8ef9e..71455be0a75b 100644 --- a/clang/test/Frontend/diagnostics-option-names.c +++ b/clang/test/Frontend/diagnostics-option-names.c @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fdiagnostics-show-option -Werror -Weverything %s 2> %t +// RUN: not %clang_cc1 -Werror -Weverything %s 2> %t // RUN: FileCheck < %t %s int f0(int, unsigned); diff --git a/clang/test/Frontend/source-col-map.c b/clang/test/Frontend/source-col-map.c index 1c8078998c56..b257261b8b2b 100644 --- a/clang/test/Frontend/source-col-map.c +++ b/clang/test/Frontend/source-col-map.c @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fsyntax-only -fmessage-length 75 -o /dev/null -x c < %s 2>&1 | FileCheck %s -strict-whitespace +// RUN: not %clang_cc1 -fsyntax-only -fmessage-length=75 -o /dev/null -x c < %s 2>&1 | FileCheck %s -strict-whitespace // REQUIRES: utf8-capable-terminal // Test case for the text diagnostics source column conversion crash. diff --git a/clang/test/Index/pch-with-errors.c b/clang/test/Index/pch-with-errors.c index 5c94a8a8e4d3..e8711c8e26a9 100644 --- a/clang/test/Index/pch-with-errors.c +++ b/clang/test/Index/pch-with-errors.c @@ -42,3 +42,6 @@ void foo(void) { // RUN: not c-index-test -write-pch %t.pch foobar.c 2>&1 | FileCheck -check-prefix=NONEXISTENT %s // NONEXISTENT: Unable to load translation unit + +// RUN: %clang -x c-header %s -o %t-clang.h.pch -Xclang -detailed-preprocessing-record -Xclang -fallow-pch-with-compiler-errors +// RUN: c-index-test -index-file %s -include %t-clang.h -Xclang -detailed-preprocessing-record | FileCheck -check-prefix=CHECK-INDEX %s diff --git a/clang/test/InterfaceStubs/bad-format.cpp b/clang/test/InterfaceStubs/bad-format.cpp index 4d51ac867eb2..1289067a365a 100644 --- a/clang/test/InterfaceStubs/bad-format.cpp +++ b/clang/test/InterfaceStubs/bad-format.cpp @@ -7,6 +7,9 @@ // RUN: not %clang -emit-interface-stubs -interface-stub-version=experimental-yaml-elf-v1 %s 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-YAML-DEPRECATED %s +// RUN: not %clang -emit-interface-stubs -interface-stub-version=experimental-ifs-v1 %s 2>&1 | \ +// RUN: FileCheck -check-prefix=CHECK-V1-DEPRECATED %s + // RUN: not %clang -emit-interface-stubs -interface-stub-version=bad-format %s 2>&1 | \ // RUN: FileCheck %s @@ -21,16 +24,22 @@ // CHECK: error: invalid value // CHECK: 'Invalid interface stub format: bad-format.' in 'Must specify a // CHECK: valid interface stub format type, ie: -// CHECK: -interface-stub-version=experimental-ifs-v1' +// CHECK: -interface-stub-version=experimental-ifs-v2' // CHECK-TAPI-DEPRECATED: error: invalid value // CHECK-TAPI-DEPRECATED: 'Invalid interface stub format: // CHECK-TAPI-DEPRECATED: experimental-tapi-elf-v1 is deprecated.' in 'Must // CHECK-TAPI-DEPRECATED: specify a valid interface stub format type, ie: -// CHECK-TAPI-DEPRECATED: -interface-stub-version=experimental-ifs-v1' +// CHECK-TAPI-DEPRECATED: -interface-stub-version=experimental-ifs-v2' // CHECK-YAML-DEPRECATED: error: invalid value // CHECK-YAML-DEPRECATED: 'Invalid interface stub format: // CHECK-YAML-DEPRECATED: experimental-yaml-elf-v1 is deprecated.' in 'Must // CHECK-YAML-DEPRECATED: specify a valid interface stub format type, ie: -// CHECK-YAML-DEPRECATED: -interface-stub-version=experimental-ifs-v1' +// CHECK-YAML-DEPRECATED: -interface-stub-version=experimental-ifs-v2' + +// CHECK-V1-DEPRECATED: error: invalid value +// CHECK-V1-DEPRECATED: 'Invalid interface stub format: +// CHECK-V1-DEPRECATED: experimental-ifs-v1 is deprecated.' in 'Must +// CHECK-V1-DEPRECATED: specify a valid interface stub format type, ie: +// CHECK-V1-DEPRECATED: -interface-stub-version=experimental-ifs-v2' diff --git a/clang/test/InterfaceStubs/blocks.c b/clang/test/InterfaceStubs/blocks.c index 927f2bf28869..8e2a01159aab 100644 --- a/clang/test/InterfaceStubs/blocks.c +++ b/clang/test/InterfaceStubs/blocks.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -emit-interface-stubs -fblocks -o - %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/class-template-partial-specialization.cpp b/clang/test/InterfaceStubs/class-template-partial-specialization.cpp index 4c0edaa2dd8f..b6580861de8b 100644 --- a/clang/test/InterfaceStubs/class-template-partial-specialization.cpp +++ b/clang/test/InterfaceStubs/class-template-partial-specialization.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/conflict-type.ifs b/clang/test/InterfaceStubs/conflict-type.ifs index aaa04775e317..cc6191900a30 100644 --- a/clang/test/InterfaceStubs/conflict-type.ifs +++ b/clang/test/InterfaceStubs/conflict-type.ifs @@ -7,10 +7,10 @@ # CHECK-IFS-NEXT: Filename: # CHECK-IFS-NEXT: Type Values: Object Func ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Object, Size: 1 } + - { Name: a, Type: Object, Size: 1 } ... diff --git a/clang/test/InterfaceStubs/constructor-using-shadow.cpp b/clang/test/InterfaceStubs/constructor-using-shadow.cpp index d4b85ac73e56..e806cc323ee7 100644 --- a/clang/test/InterfaceStubs/constructor-using-shadow.cpp +++ b/clang/test/InterfaceStubs/constructor-using-shadow.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: // CHECK-NEXT: ... - // ConstructorUsingShadowDecl +// ConstructorUsingShadowDecl struct Base { Base(int); }; struct Derived : public Base { using Base::Base; }; diff --git a/clang/test/InterfaceStubs/cxx-conversion.cpp b/clang/test/InterfaceStubs/cxx-conversion.cpp index 96425a42b6fc..f9de07d17850 100644 --- a/clang/test/InterfaceStubs/cxx-conversion.cpp +++ b/clang/test/InterfaceStubs/cxx-conversion.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/cxxdeduction-guide.cpp b/clang/test/InterfaceStubs/cxxdeduction-guide.cpp index f09b9d929ca3..4d9f24bae5b3 100644 --- a/clang/test/InterfaceStubs/cxxdeduction-guide.cpp +++ b/clang/test/InterfaceStubs/cxxdeduction-guide.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs -std=c++17 %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/driver-test3.c b/clang/test/InterfaceStubs/driver-test3.c index bccd1c9bccd4..a3f3966dbe8f 100644 --- a/clang/test/InterfaceStubs/driver-test3.c +++ b/clang/test/InterfaceStubs/driver-test3.c @@ -8,12 +8,12 @@ // CHECK-OBJ: bar -// CHECK-IFS: --- !experimental-ifs-v1 +// CHECK-IFS: --- !experimental-ifs-v2 // CHECK-IFS-NEXT: IfsVersion: // CHECK-IFS-NEXT: Triple: // CHECK-IFS-NEXT: ObjectFileFormat: // CHECK-IFS-NEXT: Symbols: -// CHECK-IFS-NEXT: "bar" : { Type: Func } +// CHECK-IFS-NEXT: - { Name: "bar", Type: Func } // CHECK-IFS-NEXT: ... int bar(int a) { return a; } diff --git a/clang/test/InterfaceStubs/empty.c b/clang/test/InterfaceStubs/empty.c new file mode 100644 index 000000000000..c68c124e513e --- /dev/null +++ b/clang/test/InterfaceStubs/empty.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s + +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 +// CHECK-NEXT: Triple: +// CHECK-NEXT: ObjectFileFormat: +// CHECK-NEXT: Symbols: +// CHECK-NEXT: ... diff --git a/clang/test/InterfaceStubs/func.ifs b/clang/test/InterfaceStubs/func.ifs index d115523bfda4..9de5213de9d6 100644 --- a/clang/test/InterfaceStubs/func.ifs +++ b/clang/test/InterfaceStubs/func.ifs @@ -7,13 +7,13 @@ # RUN: %clang -emit-interface-stubs -o - %s %s -emit-merged-ifs | \ # RUN: FileCheck %s --check-prefixes=CHECK-MERGE-IFS -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-DAG: a: { Type: Func } -# CHECK-IFS-DAG: b: { Type: Object, Size: 4 } +# CHECK-IFS-DAG: - { Name: a, Type: Func } +# CHECK-IFS-DAG: - { Name: b, Type: Object, Size: 4 } # CHECK-IFS: ... # CHECK-ELF: ELF Header: @@ -23,18 +23,18 @@ # CHECK-ELF: OBJECT GLOBAL DEFAULT 1 b # Here we are testing to see if two identical symbols will merge. -# CHECK-MERGE-IFS: --- !experimental-ifs-v1 -# CHECK-MERGE-IFS-NEXT: IfsVersion: 1.0 +# CHECK-MERGE-IFS: --- !experimental-ifs-v2 +# CHECK-MERGE-IFS-NEXT: IfsVersion: 2.0 # CHECK-MERGE-IFS-NEXT: Triple: x86_64-linux-gnu # CHECK-MERGE-IFS-NEXT: ObjectFileFormat: ELF # CHECK-MERGE-IFS-NEXT: Symbols: -# CHECK-MERGE-IFS-NEXT: a: { Type: Func } +# CHECK-MERGE-IFS-NEXT: - { Name: a, Type: Func } # CHECK-MERGE-IFS-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/clang/test/InterfaceStubs/hidden-class-inheritance.cpp b/clang/test/InterfaceStubs/hidden-class-inheritance.cpp index 19ba579608ec..2219fd5b2e8a 100644 --- a/clang/test/InterfaceStubs/hidden-class-inheritance.cpp +++ b/clang/test/InterfaceStubs/hidden-class-inheritance.cpp @@ -14,7 +14,7 @@ // RUN: -DPARENT_METHOD_VISIBILITY="" -DCHILD_METHOD_VISIBILITY="" %s | \ // RUN: FileCheck -check-prefix=CHECK-HP %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 \ +// RUN: -interface-stub-version=experimental-ifs-v2 \ // RUN: -DPARENT_CLASS_VISIBILITY=HIDDEN -DCHILD_CLASS_VISIBILITY="" \ // RUN: -DPARENT_METHOD_VISIBILITY="" -DCHILD_METHOD_VISIBILITY="" %s | \ // RUN: FileCheck -check-prefix=CHECK-HP2 %s diff --git a/clang/test/InterfaceStubs/indirect-field-decl.cpp b/clang/test/InterfaceStubs/indirect-field-decl.cpp index d0e5fd26e4b7..2c30b0ee4005 100644 --- a/clang/test/InterfaceStubs/indirect-field-decl.cpp +++ b/clang/test/InterfaceStubs/indirect-field-decl.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/inline.c b/clang/test/InterfaceStubs/inline.c index 0b0ac83726ad..1dec4ae677d7 100644 --- a/clang/test/InterfaceStubs/inline.c +++ b/clang/test/InterfaceStubs/inline.c @@ -55,8 +55,8 @@ INLINE int foo() { // RUN: -c -std=gnu89 -xc %s | llvm-nm - 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-SYMBOLS %s -// CHECK-TAPI-DAG: foo" : { Type: Func } -// CHECK-TAPI-DAG: foo.var" : { Type: Object, Size: 4 } +// CHECK-TAPI-DAG: foo", Type: Func } +// CHECK-TAPI-DAG: foo.var", Type: Object, Size: 4 } // CHECK-SYMBOLS-DAG: foo // CHECK-SYMBOLS-DAG: foo.var #include "inline.h" diff --git a/clang/test/InterfaceStubs/lambda.cpp b/clang/test/InterfaceStubs/lambda.cpp index e892f1eee11c..a167f6556b94 100644 --- a/clang/test/InterfaceStubs/lambda.cpp +++ b/clang/test/InterfaceStubs/lambda.cpp @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-interface-stubs -o - %s \ // RUN: | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: -// CHECK-NEXT: f" : { Type: Object, Size: 1 } +// CHECK-NEXT: f", Type: Object, Size: 1 } // CHECK-NEXT: ... auto f = [](void* data) { int i; }; diff --git a/clang/test/InterfaceStubs/namespace-alias.cpp b/clang/test/InterfaceStubs/namespace-alias.cpp index 6a7f27c9b7b0..a4e05f904701 100644 --- a/clang/test/InterfaceStubs/namespace-alias.cpp +++ b/clang/test/InterfaceStubs/namespace-alias.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/namespace.cpp b/clang/test/InterfaceStubs/namespace.cpp index 1c62346d22fc..ad4db24ff7de 100644 --- a/clang/test/InterfaceStubs/namespace.cpp +++ b/clang/test/InterfaceStubs/namespace.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/non-type-template-parm-decl.cpp b/clang/test/InterfaceStubs/non-type-template-parm-decl.cpp index 51176ac0ba0b..6390099dee5f 100644 --- a/clang/test/InterfaceStubs/non-type-template-parm-decl.cpp +++ b/clang/test/InterfaceStubs/non-type-template-parm-decl.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/object.c b/clang/test/InterfaceStubs/object.c index d6e28c5f884a..45e2d38ba3e9 100644 --- a/clang/test/InterfaceStubs/object.c +++ b/clang/test/InterfaceStubs/object.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fvisibility default -o - -emit-interface-stubs %s | FileCheck -check-prefix=CHECK-TAPI %s // RUN: %clang -fvisibility=default -c -o - %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s -// CHECK-TAPI: data" : { Type: Object, Size: 4 } +// CHECK-TAPI: data", Type: Object, Size: 4 } // CHECK-SYMBOLS: data int data = 42; diff --git a/clang/test/InterfaceStubs/object.ifs b/clang/test/InterfaceStubs/object.ifs index 7dc1134bac93..3afdf4e65eef 100644 --- a/clang/test/InterfaceStubs/object.ifs +++ b/clang/test/InterfaceStubs/object.ifs @@ -4,12 +4,12 @@ # RUN: %clang -emit-interface-stubs -o - %s | llvm-readelf --all | \ # RUN: FileCheck %s --check-prefixes=CHECK-ELF -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-NEXT: b: { Type: Object, Size: 4 } +# CHECK-IFS-NEXT: - { Name: b, Type: Object, Size: 4 } # CHECK-IFS-NEXT: ... # CHECK-ELF: ELF Header: @@ -19,10 +19,10 @@ # CHECK-ELF-NOT: FUNC GLOBAL DEFAULT 1 a # CHECK-ELF: OBJECT GLOBAL DEFAULT 1 b ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-linux-gnu ObjectFileFormat: ELF Symbols: - b: { Type: Object, Size: 4 } + - { Name: b, Type: Object, Size: 4 } ... diff --git a/clang/test/InterfaceStubs/ppc.cpp b/clang/test/InterfaceStubs/ppc.cpp index 9a91697d9506..8b7a276bb054 100644 --- a/clang/test/InterfaceStubs/ppc.cpp +++ b/clang/test/InterfaceStubs/ppc.cpp @@ -4,11 +4,11 @@ // RUN: -emit-interface-stubs -emit-merged-ifs -S | \ // RUN: FileCheck -check-prefix=CHECK-IFS %s - // CHECK-IFS: --- !experimental-ifs-v1 - // CHECK-IFS: IfsVersion: 1.0 - // CHECK-IFS: Triple: powerpc64le - // CHECK-IFS: Symbols: - // CHECK-IFS: _Z8helloPPCv: { Type: Func } - // CHECK-IFS: ... +// CHECK-IFS: --- !experimental-ifs-v2 +// CHECK-IFS: IfsVersion: 2.0 +// CHECK-IFS: Triple: powerpc64le +// CHECK-IFS: Symbols: +// CHECK-IFS: - { Name: _Z8helloPPCv, Type: Func } +// CHECK-IFS: ... int helloPPC(); diff --git a/clang/test/InterfaceStubs/template-constexpr.cpp b/clang/test/InterfaceStubs/template-constexpr.cpp index c4c7afa42f1e..f59a55b2bb45 100644 --- a/clang/test/InterfaceStubs/template-constexpr.cpp +++ b/clang/test/InterfaceStubs/template-constexpr.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/template-namespace-function.cpp b/clang/test/InterfaceStubs/template-namespace-function.cpp index 47788d4a3e0a..68f017c4d5ec 100644 --- a/clang/test/InterfaceStubs/template-namespace-function.cpp +++ b/clang/test/InterfaceStubs/template-namespace-function.cpp @@ -6,10 +6,10 @@ // RUN: FileCheck -check-prefix=CHECK-SYMBOLS %s // CHECK: Symbols: -// CHECK-DAG: "_ZN3qux3barEii" : { Type: Func } -// CHECK-DAG: "_ZN3baz3addIiEET_S1_S1_" : { Type: Func } -// CHECK-DAG: "_Z4fbarff" : { Type: Func } -// CHECK-DAG: "_ZN3baz3addIfEET_S1_S1_" : { Type: Func } +// CHECK-DAG: - { Name: "_ZN3qux3barEii", Type: Func } +// CHECK-DAG: - { Name: "_ZN3baz3addIiEET_S1_S1_", Type: Func } +// CHECK-DAG: - { Name: "_Z4fbarff", Type: Func } +// CHECK-DAG: - { Name: "_ZN3baz3addIfEET_S1_S1_", Type: Func } // Same symbols just different order. // CHECK-SYMBOLS-DAG: _Z4fbarff diff --git a/clang/test/InterfaceStubs/template-template-parm-decl.cpp b/clang/test/InterfaceStubs/template-template-parm-decl.cpp index 63883536a816..5451ec6178e2 100644 --- a/clang/test/InterfaceStubs/template-template-parm-decl.cpp +++ b/clang/test/InterfaceStubs/template-template-parm-decl.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/trycatch.cpp b/clang/test/InterfaceStubs/trycatch.cpp index 57076a097cb5..dac7806926a5 100644 --- a/clang/test/InterfaceStubs/trycatch.cpp +++ b/clang/test/InterfaceStubs/trycatch.cpp @@ -2,13 +2,12 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -o - -emit-interface-stubs %s | FileCheck %s - -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: x86_64-unknown-linux-gnu // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: -// CHECK-NEXT: "_Z1fv" : { Type: Func } +// CHECK-NEXT: - { Name: "_Z1fv", Type: Func } // CHECK-NEXT: ... class C5 {}; diff --git a/clang/test/InterfaceStubs/unresolved-using-typename.cpp b/clang/test/InterfaceStubs/unresolved-using-typename.cpp index e6afc781412a..d4aad84d7211 100644 --- a/clang/test/InterfaceStubs/unresolved-using-typename.cpp +++ b/clang/test/InterfaceStubs/unresolved-using-typename.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/usings.cpp b/clang/test/InterfaceStubs/usings.cpp index 735a040c91dc..2ef83207fcb3 100644 --- a/clang/test/InterfaceStubs/usings.cpp +++ b/clang/test/InterfaceStubs/usings.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: diff --git a/clang/test/InterfaceStubs/var-template-specialization-decl.cpp b/clang/test/InterfaceStubs/var-template-specialization-decl.cpp index bbb5ae888977..9b67dac9865c 100644 --- a/clang/test/InterfaceStubs/var-template-specialization-decl.cpp +++ b/clang/test/InterfaceStubs/var-template-specialization-decl.cpp @@ -1,12 +1,12 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -o - -emit-interface-stubs %s | FileCheck %s -// CHECK: --- !experimental-ifs-v1 -// CHECK-NEXT: IfsVersion: 1.0 +// CHECK: --- !experimental-ifs-v2 +// CHECK-NEXT: IfsVersion: 2.0 // CHECK-NEXT: Triple: x86_64-unknown-linux-gnu // CHECK-NEXT: ObjectFileFormat: ELF // CHECK-NEXT: Symbols: -// CHECK-NEXT: "a" : { Type: Object, Size: 4 } +// CHECK-NEXT: - { Name: "a", Type: Object, Size: 4 } // CHECK-NEXT: ... template struct S9 { diff --git a/clang/test/InterfaceStubs/weak.cpp b/clang/test/InterfaceStubs/weak.cpp index 1581ffa9d5d7..e3c0413b6511 100644 --- a/clang/test/InterfaceStubs/weak.cpp +++ b/clang/test/InterfaceStubs/weak.cpp @@ -1,14 +1,14 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -triple x86_64-linux-gnu -o - -emit-interface-stubs \ -// RUN: -interface-stub-version=experimental-ifs-v1 %s | \ +// RUN: -interface-stub-version=experimental-ifs-v2 %s | \ // RUN: FileCheck %s // RUN: %clang -target x86_64-unknown-linux-gnu -o - -c %s | llvm-nm - 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-SYMBOLS %s // CHECK: Symbols: -// CHECK-DAG: "_Z8weakFuncv" : { Type: Func, Weak: true } -// CHECK-DAG: "_Z10strongFuncv" : { Type: Func } +// CHECK-DAG: - { Name: "_Z8weakFuncv", Type: Func, Weak: true } +// CHECK-DAG: - { Name: "_Z10strongFuncv", Type: Func } // CHECK-SYMBOLS-DAG: _Z10strongFuncv // CHECK-SYMBOLS-DAG: _Z8weakFuncv diff --git a/clang/test/InterfaceStubs/windows.cpp b/clang/test/InterfaceStubs/windows.cpp index c81c702861e4..73f3ed5d39ac 100644 --- a/clang/test/InterfaceStubs/windows.cpp +++ b/clang/test/InterfaceStubs/windows.cpp @@ -6,11 +6,11 @@ // CHECK-CC1: Symbols: // CHECK-CC1-NEXT: ?helloWindowsMsvc@@YAHXZ - // CHECK-IFS: --- !experimental-ifs-v1 - // CHECK-IFS: IfsVersion: 1.0 - // CHECK-IFS: Triple: - // CHECK-IFS: Symbols: - // CHECK-IFS: ?helloWindowsMsvc@@YAHXZ: { Type: Func } - // CHECK-IFS: ... +// CHECK-IFS: --- !experimental-ifs-v2 +// CHECK-IFS: IfsVersion: 2.0 +// CHECK-IFS: Triple: +// CHECK-IFS: Symbols: +// CHECK-IFS: - { Name: '?helloWindowsMsvc@@YAHXZ', Type: Func } +// CHECK-IFS: ... int helloWindowsMsvc(); diff --git a/clang/test/Misc/diag-line-wrapping.cpp b/clang/test/Misc/diag-line-wrapping.cpp index 2bcb03f9781c..9e8cb9b53da5 100644 --- a/clang/test/Misc/diag-line-wrapping.cpp +++ b/clang/test/Misc/diag-line-wrapping.cpp @@ -1,5 +1,5 @@ -// RUN: not %clang_cc1 -fsyntax-only -fmessage-length 60 %s 2>&1 | FileCheck %s -// RUN: not %clang_cc1 -fsyntax-only -fmessage-length 0 %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -fsyntax-only -fmessage-length=60 %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -fsyntax-only -fmessage-length=0 %s 2>&1 | FileCheck %s struct B { void f(); }; struct D1 : B {}; diff --git a/clang/test/Misc/message-length.c b/clang/test/Misc/message-length.c index a6f4f44e6b9c..1e0b4edb7c03 100644 --- a/clang/test/Misc/message-length.c +++ b/clang/test/Misc/message-length.c @@ -1,6 +1,6 @@ -// RUN: not %clang_cc1 -fmessage-length 72 %s 2>&1 | FileCheck -strict-whitespace %s -// RUN: not %clang_cc1 -fmessage-length 1 %s -// RUN: not %clang_cc1 -fmessage-length 8 %s 2>&1 | FileCheck -check-prefix=CHECK-DOT %s +// RUN: not %clang_cc1 -fmessage-length=72 %s 2>&1 | FileCheck -strict-whitespace %s +// RUN: not %clang_cc1 -fmessage-length=1 %s +// RUN: not %clang_cc1 -fmessage-length=8 %s 2>&1 | FileCheck -check-prefix=CHECK-DOT %s // Hack so we can check things better, force the file name and line. # 1 "FILE" 1 diff --git a/clang/test/Misc/show-diag-options.c b/clang/test/Misc/show-diag-options.c index 8f05fbc76b56..4e98d63195f1 100644 --- a/clang/test/Misc/show-diag-options.c +++ b/clang/test/Misc/show-diag-options.c @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -fsyntax-only %s 2>&1 \ +// RUN: %clang_cc1 -fsyntax-only -fno-diagnostics-show-option %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=BASE -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-option %s 2>&1 \ +// RUN: %clang_cc1 -fsyntax-only %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=OPTION -// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-show-option -Werror %s 2>&1 \ +// RUN: not %clang_cc1 -fsyntax-only -Werror %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=OPTION_ERROR -// RUN: %clang_cc1 -fsyntax-only -std=c89 -pedantic -fdiagnostics-show-option %s 2>&1 \ +// RUN: %clang_cc1 -fsyntax-only -std=c89 -pedantic %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=OPTION_PEDANTIC -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-category id %s 2>&1 \ +// RUN: %clang_cc1 -fsyntax-only -fno-diagnostics-show-option -fdiagnostics-show-category id %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=CATEGORY_ID -// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-show-category name %s 2>&1 \ +// RUN: %clang_cc1 -fsyntax-only -fno-diagnostics-show-option -fdiagnostics-show-category name %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=CATEGORY_NAME -// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-show-option -fdiagnostics-show-category name -Werror %s 2>&1 \ +// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-show-category name -Werror %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=OPTION_ERROR_CATEGORY void test(int x, int y) { diff --git a/clang/test/Misc/unnecessary-elipses.cpp b/clang/test/Misc/unnecessary-elipses.cpp index 2ee725869b5c..c8c178c37f6c 100644 --- a/clang/test/Misc/unnecessary-elipses.cpp +++ b/clang/test/Misc/unnecessary-elipses.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -fmessage-length 80 %s 2>&1 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -fsyntax-only -fmessage-length=80 %s 2>&1 | FileCheck -strict-whitespace %s int main() { "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; @@ -12,4 +12,4 @@ int main() { "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ; // CHECK: {{^ ..."xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"...}} -} \ No newline at end of file +} diff --git a/clang/test/Misc/unprintable.c b/clang/test/Misc/unprintable.c index eaa4f34d8028..30e449456630 100644 --- a/clang/test/Misc/unprintable.c +++ b/clang/test/Misc/unprintable.c @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 %s -fmessage-length 40 2>&1 | FileCheck -strict-whitespace %s +// RUN: not %clang_cc1 %s -fmessage-length=40 2>&1 | FileCheck -strict-whitespace %s int main() { int i; diff --git a/clang/test/Misc/wrong-encoding2.c b/clang/test/Misc/wrong-encoding2.c index 43a0f4e900ed..b60ed7f92b86 100644 --- a/clang/test/Misc/wrong-encoding2.c +++ b/clang/test/Misc/wrong-encoding2.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -fmessage-length 100 %s 2>&1 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -fsyntax-only -fmessage-length=100 %s 2>&1 | FileCheck -strict-whitespace %s // REQUIRES: asserts int main() { diff --git a/clang/test/OpenMP/depobj_codegen.cpp b/clang/test/OpenMP/depobj_codegen.cpp index 2c7509babc17..e51c607ac55a 100644 --- a/clang/test/OpenMP/depobj_codegen.cpp +++ b/clang/test/OpenMP/depobj_codegen.cpp @@ -21,7 +21,7 @@ void foo() {} template T tmain(T argc) { static T a; - void *argv; + int *argv; #pragma omp depobj(a) depend(in:argv, ([3][*(int*)argv][4])argv) #pragma omp depobj(argc) destroy #pragma omp depobj(argc) update(inout) @@ -99,12 +99,12 @@ int main(int argc, char **argv) { // CHECK: store i64 8, i64* [[SZ_ADDR]], // CHECK: [[FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 2 // CHECK: store i8 1, i8* [[FLAGS_ADDR]], -// CHECK: [[SHAPE_ADDR:%.+]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK: [[SZ1:%.+]] = mul nuw i64 3, %{{.+}} +// CHECK: [[SHAPE_ADDR:%.+]] = load i32*, i32** [[ARGV_ADDR:%.+]], +// CHECK: [[SZ1:%.+]] = mul nuw i64 12, %{{.+}} // CHECK: [[SZ:%.+]] = mul nuw i64 [[SZ1]], 4 // CHECK: [[BASE_ADDR:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DEP_ADDR]], i{{.+}} 0, i{{.+}} 2 // CHECK: [[ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SHAPE:%.+]] = ptrtoint i8* [[SHAPE_ADDR]] to i64 +// CHECK: [[SHAPE:%.+]] = ptrtoint i32* [[SHAPE_ADDR]] to i64 // CHECK: store i64 [[SHAPE]], i64* [[ADDR]], // CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[BASE_ADDR]], i{{.+}} 0, i{{.+}} 1 // CHECK: store i64 [[SZ]], i64* [[SZ_ADDR]], diff --git a/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp b/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp index faff77e0a43b..c71615d2521f 100644 --- a/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp +++ b/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp @@ -1,5 +1,10 @@ -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -fexceptions -fcxx-exceptions -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fexceptions -fcxx-exceptions -ferror-limit 100 +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown \ +// RUN: -verify=host -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc \ +// RUN: %s -o %t-ppc-host.bc -fexceptions -fcxx-exceptions +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s \ +// RUN: -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - \ +// RUN: -fexceptions -fcxx-exceptions -ferror-limit 100 #ifndef HEADER #define HEADER @@ -81,4 +86,17 @@ int (*B)() = &foobar2; int foobar1() { throw 1; } int foobar2() { throw 1; } // expected-error {{cannot use 'throw' with exceptions disabled}} + +int foobar3(); +int (*C)() = &foobar3; // expected-warning {{declaration is not declared in any declare target region}} + // host-warning@-1 {{declaration is not declared in any declare target region}} +#pragma omp declare target +int (*D)() = C; // expected-note {{used here}} + // host-note@-1 {{used here}} +#pragma omp end declare target +int foobar3() { throw 1; } + +// Check no infinite recursion in deferred diagnostic emitter. +long E = (long)&E; + #endif // HEADER diff --git a/clang/test/OpenMP/target_data_ast_print.cpp b/clang/test/OpenMP/target_data_ast_print.cpp index fa67c1834aa4..fcd6e928655c 100644 --- a/clang/test/OpenMP/target_data_ast_print.cpp +++ b/clang/test/OpenMP/target_data_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -139,6 +139,8 @@ int main (int argc, char **argv) { static int a; // CHECK: static int a; +#pragma omp target data map(to: ([argc][3][a])argv) + // CHECK: #pragma omp target data map(to: ([argc][3][a])argv) #pragma omp target data map(to: c) // CHECK: #pragma omp target data map(to: c) a=2; diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp index b9766e82ce03..ecfe50c01ea6 100644 --- a/clang/test/OpenMP/target_map_codegen.cpp +++ b/clang/test/OpenMP/target_map_codegen.cpp @@ -5353,5 +5353,81 @@ void explicit_maps_single (int ii){ // CK31: define {{.+}}[[CALL00]] // CK31: define {{.+}}[[CALL01]] +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK32 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK32 --check-prefix CK32-64 +// RUN: %clang_cc1 -DCK32 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK32 --check-prefix CK32-64 +// RUN: %clang_cc1 -DCK32 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK32 --check-prefix CK32-32 +// RUN: %clang_cc1 -DCK32 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK32 --check-prefix CK32-32 + +// RUN: %clang_cc1 -DCK32 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK32 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK32 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// RUN: %clang_cc1 -DCK32 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY32 %s +// SIMD-ONLY32-NOT: {{__kmpc|__tgt}} +#ifdef CK32 + +// CK32-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [1 x i64] [i64 33] +// CK32-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +void array_shaping(float *f, int sa) { + + // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}) + // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + + // CK32-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK32-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK32-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + + // CK32-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** + // CK32-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** + + // CK32-DAG: store float* [[F1:%.+]], float** [[BPC0]], + // CK32-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK32-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], + + // CK32-DAG: [[F1]] = load float*, float** [[F_ADDR:%.+]], + // CK32-DAG: [[F2]] = load float*, float** [[F_ADDR]], + // CK32-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 4 + // CK32-64-DAG: [[SZ1]] = mul nuw i64 12, %{{.+}} + // CK32-32-DAG: [[SIZE]] = sext i32 [[SZ1:%.+]] to i64 + // CK32-32-DAG: [[SZ1]] = mul nuw i32 [[SZ2:%.+]], 4 + // CK32-32-DAG: [[SZ2]] = mul nuw i32 12, %{{.+}} + #pragma omp target map(to:([3][sa][4])f) + f[0] = 1; + sa = 1; + // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}) + // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + + // CK32-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK32-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK32-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + + // CK32-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** + // CK32-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** + + // CK32-DAG: store float* [[F1:%.+]], float** [[BPC0]], + // CK32-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK32-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], + + // CK32-DAG: [[F1]] = load float*, float** [[F_ADDR:%.+]], + // CK32-DAG: [[F2]] = load float*, float** [[F_ADDR]], + // CK32-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 5 + // CK32-64-DAG: [[SZ1]] = mul nuw i64 4, %{{.+}} + // CK32-32-DAG: [[SIZE]] = sext i32 [[SZ1:%.+]] to i64 + // CK32-32-DAG: [[SZ1]] = mul nuw i32 [[SZ2:%.+]], 5 + // CK32-32-DAG: [[SZ2]] = mul nuw i32 4, %{{.+}} + #pragma omp target map(from: ([sa][5])f) + f[0] = 1; +} + #endif #endif diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp index 96932af6a04c..a18590fc85fe 100644 --- a/clang/test/OpenMP/target_map_messages.cpp +++ b/clang/test/OpenMP/target_map_messages.cpp @@ -140,6 +140,8 @@ struct SA { {} #pragma omp target map(close bf: a) // expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} {} + #pragma omp target map(([b[I]][bf])f) // le45-error {{expected ',' or ']' in lambda capture list}} le45-error {{expected ')'}} le45-note {{to match this '('}} + {} return; } }; @@ -189,203 +191,209 @@ void SAclient(int arg) { SD u; SC r(p),t(p); - #pragma omp target map(r) +#pragma omp target map(r) {} - #pragma omp target map(marr[2][0:2][0:2]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[2] [0:2] [0:2]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:][0:2][0:2]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:] [0:2] [0:2]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[2][3][0:2]) +#pragma omp target map(marr[2][3] [0:2]) {} - #pragma omp target map(marr[:][:][:]) +#pragma omp target map(marr[:][:][:]) {} - #pragma omp target map(marr[:2][:][:]) +#pragma omp target map(marr[:2][:][:]) {} - #pragma omp target map(marr[arg:][:][:]) +#pragma omp target map(marr [arg:][:][:]) {} - #pragma omp target map(marr[arg:]) +#pragma omp target map(marr [arg:]) {} - #pragma omp target map(marr[arg:][:arg][:]) // correct if arg is the size of dimension 2 +#pragma omp target map(marr [arg:][:arg][:]) // correct if arg is the size of dimension 2 {} - #pragma omp target map(marr[:arg][:]) +#pragma omp target map(marr[:arg][:]) {} - #pragma omp target map(marr[:arg][n:]) +#pragma omp target map(marr[:arg] [n:]) {} - #pragma omp target map(marr[:][:arg][n:]) // correct if arg is the size of dimension 2 +#pragma omp target map(marr[:][:arg] [n:]) // correct if arg is the size of dimension 2 {} - #pragma omp target map(marr[:][:m][n:]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:][:m] [n:]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[n:m][:arg][n:]) +#pragma omp target map(marr [n:m][:arg] [n:]) {} - #pragma omp target map(marr[:2][:1][:]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:2][:1][:]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:2][1:][:]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:2] [1:][:]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:2][:][:1]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:2][:][:1]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:2][:][1:]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:2][:] [1:]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:1][:2][:]) +#pragma omp target map(marr[:1][:2][:]) {} - #pragma omp target map(marr[:1][0][:]) +#pragma omp target map(marr[:1][0][:]) {} - #pragma omp target map(marr[:arg][:2][:]) // correct if arg is 1 +#pragma omp target map(marr[:arg][:2][:]) // correct if arg is 1 {} - #pragma omp target map(marr[:1][3:1][:2]) +#pragma omp target map(marr[:1] [3:1][:2]) {} - #pragma omp target map(marr[:1][3:arg][:2]) // correct if arg is 1 +#pragma omp target map(marr[:1] [3:arg][:2]) // correct if arg is 1 {} - #pragma omp target map(marr[:1][3:2][:2]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:1] [3:2][:2]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr[:2][:10][:]) +#pragma omp target map(marr[:2][:10][:]) {} - #pragma omp target map(marr[:2][:][:5+5]) +#pragma omp target map(marr[:2][:][:5 + 5]) {} - #pragma omp target map(marr[:2][2+2-4:][0:5+5]) +#pragma omp target map(marr[:2] [2 + 2 - 4:] [0:5 + 5]) {} - #pragma omp target map(marr[:1][:2][0]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(marr[:1][:2][0]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(marr2[:1][:2][0]) +#pragma omp target map(marr2[:1][:2][0]) {} - #pragma omp target map(mvla[:1][:][0]) // correct if the size of dimension 2 is 1. +#pragma omp target map(mvla[:1][:][0]) // correct if the size of dimension 2 is 1. {} - #pragma omp target map(mvla[:2][:arg][:]) // correct if arg is the size of dimension 2. +#pragma omp target map(mvla[:2][:arg][:]) // correct if arg is the size of dimension 2. {} - #pragma omp target map(mvla[:1][:2][0]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(mvla[:1][:2][0]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(mvla[1][2:arg][:]) +#pragma omp target map(mvla[1] [2:arg][:]) {} - #pragma omp target map(mvla[:1][:][:]) +#pragma omp target map(mvla[:1][:][:]) {} - #pragma omp target map(mvla2[:1][:2][:11]) +#pragma omp target map(mvla2[:1][:2][:11]) {} - #pragma omp target map(mvla2[:1][:2][:10]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(mvla2[:1][:2][:10]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(mptr[:2][2+2-4:1][0:5+5]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(mptr[:2] [2 + 2 - 4:1] [0:5 + 5]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(mptr[:1][:2-1][2:4-3]) +#pragma omp target map(mptr[:1][:2 - 1] [2:4 - 3]) {} - #pragma omp target map(mptr[:1][:arg][2:4-3]) // correct if arg is 1. +#pragma omp target map(mptr[:1][:arg] [2:4 - 3]) // correct if arg is 1. {} - #pragma omp target map(mptr[:1][:2-1][0:2]) +#pragma omp target map(mptr[:1][:2 - 1] [0:2]) {} - #pragma omp target map(mptr[:1][:2][0:2]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(mptr[:1][:2] [0:2]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(mptr[:1][:][0:2]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} +#pragma omp target map(mptr[:1][:] [0:2]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} {} - #pragma omp target map(mptr[:2][:1][0:2]) // expected-error {{array section does not specify contiguous storage}} +#pragma omp target map(mptr[:2][:1] [0:2]) // expected-error {{array section does not specify contiguous storage}} {} - #pragma omp target map(r.ArrS[0].B) +#pragma omp target map(r.ArrS[0].B) {} - #pragma omp target map(r.ArrS[:1].B) // expected-error {{OpenMP array section is not allowed here}} +#pragma omp target map(r.ArrS[:1].B) // expected-error {{OpenMP array section is not allowed here}} {} - #pragma omp target map(r.ArrS[:arg].B) // expected-error {{OpenMP array section is not allowed here}} +#pragma omp target map(r.ArrS[:arg].B) // expected-error {{OpenMP array section is not allowed here}} {} - #pragma omp target map(r.ArrS[0].Arr[1:23]) +#pragma omp target map(r.ArrS[0].Arr [1:23]) {} - #pragma omp target map(r.ArrS[0].Arr[1:arg]) +#pragma omp target map(r.ArrS[0].Arr [1:arg]) {} - #pragma omp target map(r.ArrS[0].Arr[arg:23]) +#pragma omp target map(r.ArrS[0].Arr [arg:23]) {} - #pragma omp target map(r.ArrS[0].Error) // expected-error {{no member named 'Error' in 'SB'}} +#pragma omp target map(r.ArrS[0].Error) // expected-error {{no member named 'Error' in 'SB'}} {} - #pragma omp target map(r.ArrS[0].A, r.ArrS[1].A) // expected-error {{multiple array elements associated with the same variable are not allowed in map clauses of the same construct}} expected-note {{used here}} +#pragma omp target map(r.ArrS[0].A, r.ArrS[1].A) // expected-error {{multiple array elements associated with the same variable are not allowed in map clauses of the same construct}} expected-note {{used here}} {} - #pragma omp target map(r.ArrS[0].A, t.ArrS[1].A) +#pragma omp target map(r.ArrS[0].A, t.ArrS[1].A) {} - #pragma omp target map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}} +#pragma omp target map(r.PtrS[0], r.PtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}} {} - #pragma omp target map(r.PtrS, r.PtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}} +#pragma omp target map(r.PtrS, r.PtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}} {} - #pragma omp target map(r.PtrS->A, r.PtrS->B) +#pragma omp target map(r.PtrS->A, r.PtrS->B) {} - #pragma omp target map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}} +#pragma omp target map(r.RPtrS[0], r.RPtrS->B) // expected-error {{same pointer dereferenced in multiple different ways in map clause expressions}} expected-note {{used here}} {} - #pragma omp target map(r.RPtrS, r.RPtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}} +#pragma omp target map(r.RPtrS, r.RPtrS->B) // expected-error {{pointer cannot be mapped along with a section derived from itself}} expected-note {{used here}} {} - #pragma omp target map(r.RPtrS->A, r.RPtrS->B) +#pragma omp target map(r.RPtrS->A, r.RPtrS->B) {} - #pragma omp target map(r.S.Arr[:12]) +#pragma omp target map(r.S.Arr[:12]) {} - #pragma omp target map(r.S.foo()[:12]) // le45-error {{expected expression containing only member accesses and/or array sections based on named variables}} le50-error {{expected addressable lvalue in 'map' clause}} +#pragma omp target map(r.S.foo() [:12]) // le45-error {{expected expression containing only member accesses and/or array sections based on named variables}} le50-error {{expected addressable lvalue in 'map' clause}} {} - #pragma omp target map(r.C, r.D) +#pragma omp target map(r.C, r.D) {} - #pragma omp target map(r.C, r.C) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} +#pragma omp target map(r.C, r.C) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} {} - #pragma omp target map(r.C) map(r.C) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} +#pragma omp target map(r.C) map(r.C) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} {} - #pragma omp target map(r.C, r.S) // this would be an error only caught at runtime - Sema would have to make sure there is not way for the missing data between fields to be mapped somewhere else. +#pragma omp target map(r.C, r.S) // this would be an error only caught at runtime - Sema would have to make sure there is not way for the missing data between fields to be mapped somewhere else. {} - #pragma omp target map(r, r.S) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} +#pragma omp target map(r, r.S) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}} {} - #pragma omp target map(r.C, t.C) +#pragma omp target map(r.C, t.C) {} - #pragma omp target map(r.A) // expected-error {{bit fields cannot be used to specify storage in a 'map' clause}} +#pragma omp target map(r.A) // expected-error {{bit fields cannot be used to specify storage in a 'map' clause}} {} - #pragma omp target map(r.Arr) +#pragma omp target map(r.Arr) {} - #pragma omp target map(r.Arr[3:5]) +#pragma omp target map(r.Arr [3:5]) {} - #pragma omp target map(r.Ptr[3:5]) +#pragma omp target map(r.Ptr [3:5]) {} - #pragma omp target map(r.ArrS[3:5].A) // expected-error {{OpenMP array section is not allowed here}} +#pragma omp target map(r.ArrS [3:5].A) // expected-error {{OpenMP array section is not allowed here}} {} - #pragma omp target map(r.ArrS[3:5].Arr[6:7]) // expected-error {{OpenMP array section is not allowed here}} +#pragma omp target map(r.ArrS [3:5].Arr [6:7]) // expected-error {{OpenMP array section is not allowed here}} {} - #pragma omp target map(r.ArrS[3].Arr[6:7]) +#pragma omp target map(r.ArrS[3].Arr [6:7]) {} - #pragma omp target map(r.S.Arr[4:5]) +#pragma omp target map(r.S.Arr [4:5]) {} - #pragma omp target map(r.S.Ptr[4:5]) +#pragma omp target map(r.S.Ptr [4:5]) {} - #pragma omp target map(r.S.Ptr[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} +#pragma omp target map(r.S.Ptr[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} {} - #pragma omp target map((p+1)->A) // le45-error {{expected expression containing only member accesses and/or array sections based on named variables}} +#pragma omp target map((p + 1)->A) // le45-error {{expected expression containing only member accesses and/or array sections based on named variables}} {} - #pragma omp target map(u.B) // expected-error {{mapping of union members is not allowed}} +#pragma omp target map(u.B) // expected-error {{mapping of union members is not allowed}} {} - #pragma omp target +#pragma omp target { u.B = 0; r.S.foo(); } - #pragma omp target data map(to: r.C) //expected-note {{used here}} +#pragma omp target data map(to \ + : r.C) //expected-note {{used here}} { - #pragma omp target map(r.D) // expected-error {{original storage of expression in data environment is shared but data environment do not fully contain mapped expression storage}} +#pragma omp target map(r.D) // expected-error {{original storage of expression in data environment is shared but data environment do not fully contain mapped expression storage}} {} } - #pragma omp target data map(to: t.Ptr) //expected-note {{used here}} +#pragma omp target data map(to \ + : t.Ptr) //expected-note {{used here}} { - #pragma omp target map(t.Ptr[:23]) // expected-error {{pointer cannot be mapped along with a section derived from itself}} +#pragma omp target map(t.Ptr[:23]) // expected-error {{pointer cannot be mapped along with a section derived from itself}} {} } - #pragma omp target data map(to: t.C, t.D) +#pragma omp target data map(to \ + : t.C, t.D) { - #pragma omp target data map(to: t.C) +#pragma omp target data map(to \ + : t.C) { - #pragma omp target map(t.D) +#pragma omp target map(t.D) {} } } - #pragma omp target data map(marr[:][:][:]) +#pragma omp target data map(marr[:][:][:]) { - #pragma omp target data map(marr) +#pragma omp target data map(marr) {} } - #pragma omp target data map(to: t) +#pragma omp target data map(to \ + : t) { - #pragma omp target data map(to: t.C) +#pragma omp target data map(to \ + : t.C) { - #pragma omp target map(t.D) +#pragma omp target map(t.D) {} } } diff --git a/clang/test/OpenMP/target_update_ast_print.cpp b/clang/test/OpenMP/target_update_ast_print.cpp index e60e081b3210..fb6440b87cea 100644 --- a/clang/test/OpenMP/target_update_ast_print.cpp +++ b/clang/test/OpenMP/target_update_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -14,29 +14,29 @@ void foo() {} template T foo(T targ, U uarg) { - static T a; + static T a, *p; U b; int l; -#pragma omp target update to(a) if(l>5) device(l) nowait depend(inout:l) +#pragma omp target update to(([a][targ])p, a) if(l>5) device(l) nowait depend(inout:l) -#pragma omp target update from(b) if(l<5) device(l-1) nowait depend(inout:l) +#pragma omp target update from(b, ([a][targ])p) if(l<5) device(l-1) nowait depend(inout:l) return a + targ + (T)b; } -// CHECK: static T a; +// CHECK: static T a, *p; // CHECK-NEXT: U b; // CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(a) if(l > 5) device(l) nowait depend(inout : l){{$}} -// CHECK-NEXT: #pragma omp target update from(b) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static int a; +// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l){{$}} +// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) +// CHECK: static int a, *p; // CHECK-NEXT: float b; // CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b) if(l < 5) device(l - 1) nowait depend(inout : l) -// CHECK: static char a; +// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) +// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) +// CHECK: static char a, *p; // CHECK-NEXT: float b; // CHECK-NEXT: int l; -// CHECK-NEXT: #pragma omp target update to(a) if(l > 5) device(l) nowait depend(inout : l) -// CHECK-NEXT: #pragma omp target update from(b) if(l < 5) device(l - 1) nowait depend(inout : l) +// CHECK-NEXT: #pragma omp target update to(([a][targ])p,a) if(l > 5) device(l) nowait depend(inout : l) +// CHECK-NEXT: #pragma omp target update from(b,([a][targ])p) if(l < 5) device(l - 1) nowait depend(inout : l) int main(int argc, char **argv) { static int a; diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp index 479461e7ca80..fd5a62a8067c 100644 --- a/clang/test/OpenMP/target_update_codegen.cpp +++ b/clang/test/OpenMP/target_update_codegen.cpp @@ -984,5 +984,80 @@ void lvalue_find_base(float **f, SSA *sa) { #pragma omp target update from(*(sa->sa->i+*(1+sa->i+f))) } +#endif +///==========================================================================/// +// RUN: %clang_cc1 -DCK18 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64 +// RUN: %clang_cc1 -DCK18 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64 +// RUN: %clang_cc1 -DCK18 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-32 +// RUN: %clang_cc1 -DCK18 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-32 + +// RUN: %clang_cc1 -DCK18 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY18 %s +// RUN: %clang_cc1 -DCK18 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY18 %s +// RUN: %clang_cc1 -DCK18 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY18 %s +// RUN: %clang_cc1 -DCK18 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY18 %s +// SIMD-ONLY18-NOT: {{__kmpc|__tgt}} +#ifdef CK18 + +// CK18-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [1 x i64] [i64 33] +// CK18-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +//CK18-LABEL: array_shaping +void array_shaping(float *f, int sa) { + + // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}) + // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + + // CK18-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK18-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK18-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + + // CK18-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** + // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** + + // CK18-DAG: store float* [[F1:%.+]], float** [[BPC0]], + // CK18-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], + + // CK18-DAG: [[F1]] = load float*, float** [[F_ADDR:%.+]], + // CK18-DAG: [[F2]] = load float*, float** [[F_ADDR]], + // CK18-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 4 + // CK18-64-DAG: [[SZ1]] = mul nuw i64 12, %{{.+}} + // CK18-32-DAG: [[SIZE]] = sext i32 [[SZ1:%.+]] to i64 + // CK18-32-DAG: [[SZ1]] = mul nuw i32 [[SZ2:%.+]], 4 + // CK18-32-DAG: [[SZ2]] = mul nuw i32 12, %{{.+}} + #pragma omp target update to(([3][sa][4])f) + sa = 1; + // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}) + // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] + // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] + // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + + // CK18-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK18-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK18-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 + + // CK18-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** + // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** + + // CK18-DAG: store float* [[F1:%.+]], float** [[BPC0]], + // CK18-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], + + // CK18-DAG: [[F1]] = load float*, float** [[F_ADDR:%.+]], + // CK18-DAG: [[F2]] = load float*, float** [[F_ADDR]], + // CK18-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 5 + // CK18-64-DAG: [[SZ1]] = mul nuw i64 4, %{{.+}} + // CK18-32-DAG: [[SIZE]] = sext i32 [[SZ1:%.+]] to i64 + // CK18-32-DAG: [[SZ1]] = mul nuw i32 [[SZ2:%.+]], 5 + // CK18-32-DAG: [[SZ2]] = mul nuw i32 4, %{{.+}} + #pragma omp target update from(([sa][5])f) +} + #endif #endif diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 0f11b390f7fa..1da6c5045934 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -164,8 +164,8 @@ int main(int argc, char **argv) { #pragma omp threadprivate(a) Enum ee; // CHECK: Enum ee; -#pragma omp task untied mergeable depend(out:argv[:a][1], (arr)[0:],([argc][10])argv) if(task: argc > 0) priority(f) depend(depobj:y) - // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[:a][1],(arr)[0:],([argc][10])argv) if(task: argc > 0) priority(f) depend(depobj : y) +#pragma omp task untied mergeable depend(out:argv[:a][1], (arr)[0:],([argc][10])argv,b) if(task: argc > 0) priority(f) depend(depobj:y) + // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[:a][1],(arr)[0:],([argc][10])argv,b) if(task: argc > 0) priority(f) depend(depobj : y) a = 2; // CHECK-NEXT: a = 2; #pragma omp taskgroup task_reduction(min: arr1) diff --git a/clang/test/OpenMP/task_codegen.c b/clang/test/OpenMP/task_codegen.c index 9e4b3b59d6d5..0f01f11be8b3 100644 --- a/clang/test/OpenMP/task_codegen.c +++ b/clang/test/OpenMP/task_codegen.c @@ -58,7 +58,7 @@ int main() { // CHECK: store i8 1, i8* [[FLAGS_ADDR]], // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], // CHECK: [[A_CAST:%.+]] = sext i32 [[A]] to i64 - // CHECK: [[SZ1:%.+]] = mul nuw i64 3, [[A_CAST]] + // CHECK: [[SZ1:%.+]] = mul nuw i64 24, [[A_CAST]] // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], // CHECK: [[A_CAST:%.+]] = sext i32 [[A]] to i64 // CHECK: [[SZ:%.+]] = mul nuw i64 [[SZ1]], [[A_CAST]] diff --git a/clang/test/OpenMP/task_depend_messages.cpp b/clang/test/OpenMP/task_depend_messages.cpp index 7d976eca2ec1..f04c167cbdcc 100644 --- a/clang/test/OpenMP/task_depend_messages.cpp +++ b/clang/test/OpenMP/task_depend_messages.cpp @@ -67,8 +67,8 @@ int main(int argc, char **argv, char *env[]) { #pragma omp task depend(in : ([]) // omp45-error {{expected body of lambda expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} omp50-error 2 {{expected expression}} #pragma omp task depend(in : ([])a // omp45-error {{expected body of lambda expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} omp50-error {{expected expression}} #pragma omp task depend(in : ([])a) // omp45-error {{expected body of lambda expression}} omp50-error {{expected expression}} - #pragma omp task depend(in : ([a])a) // omp45-error {{expected body of lambda expression}} omp50-error {{expected pointer type expression as a base of an array shaping operation}} - #pragma omp task depend(in : ([a])argc) // omp45-error {{expected body of lambda expression}} omp50-error {{expected pointer type expression as a base of an array shaping operation}} + #pragma omp task depend(in : ([a])a) // omp45-error {{expected body of lambda expression}} omp50-error {{expected expression with a pointer to a complete type as a base of an array shaping operation}} + #pragma omp task depend(in : ([a])argc) // omp45-error {{expected body of lambda expression}} omp50-error {{expected expression with a pointer to a complete type as a base of an array shaping operation}} #pragma omp task depend(in : ([-1][0])argv) // omp45-error {{expected variable name or 'this' in lambda capture list}} omp45-error {{expected ')'}} omp45-note {{to match this '('}} omp50-error {{array shaping dimension is evaluated to a non-positive value -1}} omp50-error {{array shaping dimension is evaluated to a non-positive value 0}} foo(); diff --git a/clang/test/Sema/parentheses.c b/clang/test/Sema/parentheses.c index 047bcbfe6caf..164fe4c0f12d 100644 --- a/clang/test/Sema/parentheses.c +++ b/clang/test/Sema/parentheses.c @@ -117,5 +117,5 @@ void conditional_op(int x, int y, _Bool b, void* p) { (void)(x && b ? 1 : 2); // no warning, logical operator } -// RUN: not %clang_cc1 -fsyntax-only -Wparentheses -Werror -fdiagnostics-show-option %s 2>&1 | FileCheck %s -check-prefix=CHECK-FLAG +// RUN: not %clang_cc1 -fsyntax-only -Wparentheses -Werror %s 2>&1 | FileCheck %s -check-prefix=CHECK-FLAG // CHECK-FLAG: error: using the result of an assignment as a condition without parentheses [-Werror,-Wparentheses] diff --git a/clang/test/SemaCXX/atomic-type.cpp b/clang/test/SemaCXX/atomic-type.cpp index 1ed321e47b9a..d7d8bbba50ca 100644 --- a/clang/test/SemaCXX/atomic-type.cpp +++ b/clang/test/SemaCXX/atomic-type.cpp @@ -103,3 +103,11 @@ namespace copy_init { bool PR21836(_Atomic(int) *x) { // expected-warning {{'_Atomic' is a C11 extension}} return *x; } + +namespace non_trivially_copyable { + struct S { + ~S() {} + }; + _Atomic S s; // expected-error {{_Atomic cannot be applied to type 'non_trivially_copyable::S' which is not trivially copyable}} \ + // expected-warning {{'_Atomic' is a C11 extension}} +} diff --git a/clang/unittests/Analysis/CloneDetectionTest.cpp b/clang/unittests/Analysis/CloneDetectionTest.cpp index e09d0733f044..f8f3602f5a2a 100644 --- a/clang/unittests/Analysis/CloneDetectionTest.cpp +++ b/clang/unittests/Analysis/CloneDetectionTest.cpp @@ -42,7 +42,7 @@ class NoBarFunctionConstraint { for (const StmtSequence &Arg : {A, B}) { if (const auto *D = dyn_cast(Arg.getContainingDecl())) { - if (StringRef(D->getNameAsString()).startswith("bar")) + if (D->getName().startswith("bar")) return false; } } diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c index 498c05900bf2..5ff1e9cd8070 100644 --- a/compiler-rt/lib/profile/GCDAProfiling.c +++ b/compiler-rt/lib/profile/GCDAProfiling.c @@ -348,20 +348,29 @@ void llvm_gcda_start_file(const char *orig_filename, const char version[4], fd = open(filename, O_RDWR | O_BINARY); if (fd == -1) { - /* Try opening the file, creating it if necessary. */ - new_file = 1; - mode = "w+b"; - fd = open(filename, O_RDWR | O_CREAT | O_BINARY, 0644); - if (fd == -1) { + /* Try creating the file. */ + fd = open(filename, O_RDWR | O_CREAT | O_EXCL | O_BINARY, 0644); + if (fd != -1) { + new_file = 1; + mode = "w+b"; + } else { /* Try creating the directories first then opening the file. */ __llvm_profile_recursive_mkdir(filename); - fd = open(filename, O_RDWR | O_CREAT | O_BINARY, 0644); - if (fd == -1) { - /* Bah! It's hopeless. */ - int errnum = errno; - fprintf(stderr, "profiling: %s: cannot open: %s\n", filename, - strerror(errnum)); - return; + fd = open(filename, O_RDWR | O_CREAT | O_EXCL | O_BINARY, 0644); + if (fd != -1) { + new_file = 1; + mode = "w+b"; + } else { + /* Another process may have created the file just now. + * Try opening it without O_CREAT and O_EXCL. */ + fd = open(filename, O_RDWR | O_BINARY); + if (fd == -1) { + /* Bah! It's hopeless. */ + int errnum = errno; + fprintf(stderr, "profiling: %s: cannot open: %s\n", filename, + strerror(errnum)); + return; + } } } } diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat index bf502873b113..0755688e5bd3 100644 --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -59,4 +59,4 @@ gcc ^ -DSANITIZER_DEBUG=0 ^ -O3 ^ -fomit-frame-pointer ^ - -std=c++11 + -std=c++14 diff --git a/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.driver.c b/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.driver.c new file mode 100644 index 000000000000..6ce12d35772f --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.driver.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include + +#define CHILDREN 7 + +int main(int argc, char *argv[]) { + _Atomic int *sync = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (sync == MAP_FAILED) + return 1; + *sync = 0; + + for (int i = 0; i < CHILDREN; i++) { + pid_t pid = fork(); + if (!pid) { + // child + while (*sync == 0) + ; // wait the parent in order to call execl simultaneously + execl(argv[1], argv[1], NULL); + } else if (pid == -1) { + *sync = 1; // release all children + return 1; + } + } + + // parent + *sync = 1; // start the program in all children simultaneously + for (int i = 0; i < CHILDREN; i++) + wait(NULL); + + return 0; +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.target.c b/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.target.c new file mode 100644 index 000000000000..ae6e60fb2190 --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-gcov-parallel.target.c @@ -0,0 +1,9 @@ +#define COUNT 101 + +static volatile int aaa; + +int main(int argc, char *argv[]) { + for (int i = 0; i < COUNT; i++) + aaa++; + return 0; +} diff --git a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test new file mode 100644 index 000000000000..0c7198e3c4e9 --- /dev/null +++ b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test @@ -0,0 +1,16 @@ +RUN: mkdir -p %t.d +RUN: cd %t.d + +RUN: %clang -o %t.driver %S/../Inputs/instrprof-gcov-parallel.driver.c +RUN: %clang --coverage -o %t.target %S/../Inputs/instrprof-gcov-parallel.target.c +RUN: test -f instrprof-gcov-parallel.target.gcno + +RUN: rm -f instrprof-gcov-parallel.target.gcda +RUN: %run %t.driver %t.target +RUN: llvm-cov gcov instrprof-gcov-parallel.target.gcda +RUN: FileCheck --input-file instrprof-gcov-parallel.target.c.gcov %s + +# Test if the .gcda file is correctly created from one of child processes +# and counters of all processes are recorded correctly. +# 707 = CHILDREN * COUNT +CHECK: 707: {{[0-9]+}}: aaa++; diff --git a/libc/src/signal/linux/CMakeLists.txt b/libc/src/signal/linux/CMakeLists.txt index a3017e406ee1..7992703a42d2 100644 --- a/libc/src/signal/linux/CMakeLists.txt +++ b/libc/src/signal/linux/CMakeLists.txt @@ -94,3 +94,29 @@ add_entrypoint_object( sigaction signal_h ) + +add_entrypoint_object( + sigfillset + SRCS + sigfillset.cpp + HDRS + signal.h + ../sigfillset.h + DEPENDS + __errno_location + errno_h + signal_h +) + +add_entrypoint_object( + sigdelset + SRCS + sigdelset.cpp + HDRS + signal.h + ../sigdelset.h + DEPENDS + __errno_location + errno_h + signal_h +) diff --git a/libc/src/signal/linux/sigdelset.cpp b/libc/src/signal/linux/sigdelset.cpp new file mode 100644 index 000000000000..b04ec56cc33e --- /dev/null +++ b/libc/src/signal/linux/sigdelset.cpp @@ -0,0 +1,28 @@ +//===----------------- Linux implementation of sigdelset ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/signal/sigdelset.h" +#include "include/errno.h" +#include "src/errno/llvmlibc_errno.h" +#include "src/signal/linux/signal.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +int LLVM_LIBC_ENTRYPOINT(sigdelset)(sigset_t *set, int signum) { + if (!set || (unsigned)(signum - 1) >= (8 * sizeof(sigset_t))) { + llvmlibc_errno = EINVAL; + return -1; + } + auto *sigset = reinterpret_cast<__llvm_libc::Sigset *>(set); + sigset->delset(signum); + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/src/signal/linux/sigfillset.cpp b/libc/src/signal/linux/sigfillset.cpp new file mode 100644 index 000000000000..6c10da334993 --- /dev/null +++ b/libc/src/signal/linux/sigfillset.cpp @@ -0,0 +1,28 @@ +//===----------------- Linux implementation of sigfillset -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/signal/sigfillset.h" +#include "include/errno.h" +#include "src/errno/llvmlibc_errno.h" +#include "src/signal/linux/signal.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +int LLVM_LIBC_ENTRYPOINT(sigfillset)(sigset_t *set) { + if (!set) { + llvmlibc_errno = EINVAL; + return -1; + } + auto *sigset = reinterpret_cast<__llvm_libc::Sigset *>(set); + *sigset = __llvm_libc::Sigset::fullset(); + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/src/signal/linux/signal.h b/libc/src/signal/linux/signal.h index 93b33596580f..9a02ea7d847d 100644 --- a/libc/src/signal/linux/signal.h +++ b/libc/src/signal/linux/signal.h @@ -26,9 +26,9 @@ struct Sigset { constexpr static Sigset fullset() { return {-1UL}; } constexpr static Sigset emptySet() { return {0}; } - constexpr void addset(int signal) { - nativeSigset |= (1L << (signal - 1)); - } + constexpr void addset(int signal) { nativeSigset |= (1L << (signal - 1)); } + + constexpr void delset(int signal) { nativeSigset &= ~(1L << (signal - 1)); } operator sigset_t() const { return nativeSigset; } }; @@ -39,16 +39,15 @@ static inline int block_all_signals(Sigset &set) { sigset_t nativeSigset = all; sigset_t oldSet = set; int ret = __llvm_libc::syscall(SYS_rt_sigprocmask, SIG_BLOCK, &nativeSigset, - &oldSet, sizeof(sigset_t)); + &oldSet, sizeof(sigset_t)); set = {oldSet}; return ret; } static inline int restore_signals(const Sigset &set) { sigset_t nativeSigset = set; - return __llvm_libc::syscall(SYS_rt_sigprocmask, SIG_SETMASK, - &nativeSigset, nullptr, - sizeof(sigset_t)); + return __llvm_libc::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &nativeSigset, + nullptr, sizeof(sigset_t)); } } // namespace __llvm_libc diff --git a/libc/src/signal/sigdelset.h b/libc/src/signal/sigdelset.h new file mode 100644 index 000000000000..05cc47ce4b9c --- /dev/null +++ b/libc/src/signal/sigdelset.h @@ -0,0 +1,20 @@ +//===------------- Implementation header for sigdelset ---------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SIGNAL_SIGDELSET_H +#define LLVM_LIBC_SRC_SIGNAL_SIGDELSET_H + +#include "include/signal.h" + +namespace __llvm_libc { + +int sigdelset(sigset_t *set, int signum); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SIGNAL_SIGDELSET_H diff --git a/libc/src/signal/sigfillset.h b/libc/src/signal/sigfillset.h new file mode 100644 index 000000000000..facf67919a9f --- /dev/null +++ b/libc/src/signal/sigfillset.h @@ -0,0 +1,20 @@ +//===------------- Implementation header for sigfillset --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SIGNAL_SIGFILLSET_H +#define LLVM_LIBC_SRC_SIGNAL_SIGFILLSET_H + +#include "include/signal.h" + +namespace __llvm_libc { + +int sigfillset(sigset_t *set); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SIGNAL_SIGFILLSET_H diff --git a/libc/test/src/signal/CMakeLists.txt b/libc/test/src/signal/CMakeLists.txt index e198e416c8e6..67219c85227a 100644 --- a/libc/test/src/signal/CMakeLists.txt +++ b/libc/test/src/signal/CMakeLists.txt @@ -66,3 +66,34 @@ add_libc_unittest( __errno_location errno_h ) + +add_libc_unittest( + sigfillset_test + SUITE + libc_signal_unittests + SRCS + sigfillset_test.cpp + DEPENDS + sigfillset + sigprocmask + signal_h + raise + errno_h + __errno_location +) + +add_libc_unittest( + sigdelset_test + SUITE + libc_signal_unittests + SRCS + sigdelset_test.cpp + DEPENDS + sigdelset + sigfillset + sigprocmask + signal_h + raise + errno_h + __errno_location +) diff --git a/libc/test/src/signal/sigdelset_test.cpp b/libc/test/src/signal/sigdelset_test.cpp new file mode 100644 index 000000000000..d6e259ca4ca0 --- /dev/null +++ b/libc/test/src/signal/sigdelset_test.cpp @@ -0,0 +1,36 @@ +//===--------------------- Unittests for sigdelset ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/errno.h" +#include "include/signal.h" +#include "src/signal/raise.h" +#include "src/signal/sigdelset.h" +#include "src/signal/sigfillset.h" +#include "src/signal/sigprocmask.h" + +#include "utils/UnitTest/ErrnoSetterMatcher.h" +#include "utils/UnitTest/Test.h" + +TEST(Sigdelset, Invalid) { + using __llvm_libc::testing::ErrnoSetterMatcher::Fails; + // Invalid set. + EXPECT_THAT(__llvm_libc::sigdelset(nullptr, SIGUSR1), Fails(EINVAL)); + + sigset_t set; + // Valid set, invalid signum. + EXPECT_THAT(__llvm_libc::sigdelset(&set, -1), Fails(EINVAL)); +} + +TEST(Sigdelset, UnblockOne) { + using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds; + sigset_t set; + EXPECT_THAT(__llvm_libc::sigfillset(&set), Succeeds()); + EXPECT_THAT(__llvm_libc::sigdelset(&set, SIGUSR1), Succeeds()); + EXPECT_THAT(__llvm_libc::sigprocmask(SIG_SETMASK, &set, nullptr), Succeeds()); + EXPECT_DEATH([] { __llvm_libc::raise(SIGUSR1); }, SIGUSR1); +} diff --git a/libc/test/src/signal/sigfillset_test.cpp b/libc/test/src/signal/sigfillset_test.cpp new file mode 100644 index 000000000000..35e6721ab6c6 --- /dev/null +++ b/libc/test/src/signal/sigfillset_test.cpp @@ -0,0 +1,29 @@ +//===-------------------- Unittests for sigfillset ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/errno.h" +#include "include/signal.h" +#include "src/signal/raise.h" +#include "src/signal/sigfillset.h" +#include "src/signal/sigprocmask.h" + +#include "utils/UnitTest/ErrnoSetterMatcher.h" +#include "utils/UnitTest/Test.h" + +TEST(Sigfillset, Invalid) { + using __llvm_libc::testing::ErrnoSetterMatcher::Fails; + EXPECT_THAT(__llvm_libc::sigfillset(nullptr), Fails(EINVAL)); +} + +TEST(Sigfillset, BlocksAll) { + using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds; + sigset_t set; + EXPECT_THAT(__llvm_libc::sigfillset(&set), Succeeds()); + EXPECT_THAT(__llvm_libc::sigprocmask(SIG_SETMASK, &set, nullptr), Succeeds()); + EXPECT_EXITS([] { __llvm_libc::raise(SIGUSR1); }, 0); +} diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 8fdf4a4939d1..15e9df2ea269 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -852,18 +852,6 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v // is_floating_point -#if __has_keyword(__is_floating_point) - -template -struct _LIBCPP_TEMPLATE_VIS is_floating_point : _BoolConstant<__is_floating_point(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 -template -_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_floating_point_v = __is_floating_point(_Tp); -#endif - -#else - template struct __libcpp_is_floating_point : public false_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; template <> struct __libcpp_is_floating_point : public true_type {}; @@ -878,8 +866,6 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_floating_point_v = is_floating_point<_Tp>::value; #endif -#endif // __has_keyword(__is_floating_point) - // is_array #if __has_keyword(__is_array) @@ -1192,17 +1178,6 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_enum_v // is_arithmetic -#if __has_keyword(__is_arithmetic) - -template -struct _LIBCPP_TEMPLATE_VIS is_arithmetic : _BoolConstant<__is_arithmetic(_Tp)> { }; - -#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) -template -_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_arithmetic_v = __is_arithmetic(_Tp); -#endif - -#else // __has_keyword(__is_arithmetic) template struct _LIBCPP_TEMPLATE_VIS is_arithmetic : public integral_constant::value || @@ -1214,8 +1189,6 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_arithmetic_v = is_arithmetic<_Tp>::value; #endif -#endif // __has_keyword(__is_arithmetic) - // is_fundamental // In clang 9 and lower, this builtin did not work for nullptr_t. Additionally, in C++03 mode, diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp index 728eec2938e1..03aff1681227 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp @@ -10,6 +10,8 @@ // int_type pbackfail(int_type c = traits::eof()); +// FILE_DEPENDENCIES: underflow.dat + #include #include diff --git a/libcxx/utils/libcxx/compiler.py b/libcxx/utils/libcxx/compiler.py index dd334cdbcff9..248b5ba8a98c 100644 --- a/libcxx/utils/libcxx/compiler.py +++ b/libcxx/utils/libcxx/compiler.py @@ -29,6 +29,7 @@ def __init__(self, config, path, flags=None, compile_flags=None, link_flags=None self.flags = list(flags or []) self.compile_flags = list(compile_flags or []) self.link_flags = list(link_flags or []) + self.link_libcxxabi_flag = '-lc++abi' self.warning_flags = list(warning_flags or []) self.verify_supported = verify_supported self.use_verify = use_verify diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 9c154bafa808..20d0a796a3af 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -822,6 +822,7 @@ def configure_link_flags_abi_library(self): if cxxabi_library_root: libname = self.make_static_lib_name('c++abi') abs_path = os.path.join(cxxabi_library_root, libname) + self.cxx.link_libcxxabi_flag = abs_path self.cxx.link_flags += [abs_path] else: self.cxx.link_flags += ['-lc++abi'] @@ -1025,6 +1026,7 @@ def configure_substitutions(self): sub.append(('%{flags}', ' '.join(map(pipes.quote, self.cxx.flags)))) sub.append(('%{compile_flags}', ' '.join(map(pipes.quote, self.cxx.compile_flags)))) sub.append(('%{link_flags}', ' '.join(map(pipes.quote, self.cxx.link_flags)))) + sub.append(('%{link_libcxxabi}', pipes.quote(self.cxx.link_libcxxabi_flag))) if self.cxx.isVerifySupported(): sub.append(('%{verify}', ' '.join(self.cxx.verify_flags))) # Add compile and build shortcuts @@ -1048,7 +1050,6 @@ def configure_substitutions(self): exec_args.append('--host {}'.format(self.executor.user_prefix + self.executor.host)) executor = os.path.join(self.libcxx_src_root, 'utils', 'ssh.py') else: - exec_args.append('--working_directory "%S"') executor = os.path.join(self.libcxx_src_root, 'utils', 'run.py') sub.append(('%{exec}', '{} {} {} -- '.format(pipes.quote(sys.executable), pipes.quote(executor), diff --git a/libcxx/utils/libcxx/test/executor.py b/libcxx/utils/libcxx/test/executor.py index b555b1f03df9..c34310cdd2e2 100644 --- a/libcxx/utils/libcxx/test/executor.py +++ b/libcxx/utils/libcxx/test/executor.py @@ -10,6 +10,7 @@ import os import posixpath import ntpath +import shutil from libcxx.test import tracing from libcxx.util import executeCommand @@ -61,6 +62,12 @@ def run(self, exe_path, cmd=None, work_dir='.', file_deps=None, env=None): if env: env = self.merge_environments(os.environ, env) + for dep in file_deps: + if os.path.isdir(dep): + shutil.copytree(dep, os.path.join(work_dir, os.path.basename(dep)), symlinks=True) + else: + shutil.copy2(dep, work_dir) + out, err, rc = executeCommand(cmd, cwd=work_dir, env=env) return (cmd, out, err, rc) diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index ff23273ab9a6..1bc85f24976a 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -9,6 +9,8 @@ import copy import errno import os +import shutil +import tempfile import time import random @@ -125,8 +127,8 @@ def _execute(self, test, lit_config): lit_config.fatal('Unsupported RUN line found in test %s' % name) tmpDir, tmpBase = lit.TestRunner.getTempPaths(test) - substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, - tmpBase) + substitutions = lit.TestRunner.getDefaultSubstitutions( + test, tmpDir, tmpBase, normalize_slashes=self.execute_external) # Apply substitutions in FILE_DEPENDENCIES markup data_files = lit.TestRunner.applySubstitutions(test.file_dependencies, substitutions, @@ -209,16 +211,21 @@ def _evaluate_pass_test(self, test, tmpBase, lit_config, report += "Compilation failed unexpectedly!" return lit.Test.Result(lit.Test.FAIL, report) # Run the test - local_cwd = os.path.dirname(source_path) env = None if self.exec_env: env = self.exec_env max_retry = test.allowed_retries + 1 for retry_count in range(max_retry): - cmd, out, err, rc = self.executor.run(exec_path, [exec_path], - local_cwd, data_files, - env) + # Create a temporary directory just for that test and run the + # test in that directory + try: + execDirTmp = tempfile.mkdtemp(dir=execDir) + cmd, out, err, rc = self.executor.run(exec_path, [exec_path], + execDirTmp, data_files, + env) + finally: + shutil.rmtree(execDirTmp) report = "Compiled With: '%s'\n" % ' '.join(compile_cmd) report += libcxx.util.makeReport(cmd, out, err, rc) if rc == 0: diff --git a/libcxx/utils/run.py b/libcxx/utils/run.py index 6a89a2b9388a..7de82c78dbfa 100644 --- a/libcxx/utils/run.py +++ b/libcxx/utils/run.py @@ -14,14 +14,15 @@ import argparse import os +import shutil import subprocess import sys +import tempfile def main(): parser = argparse.ArgumentParser() parser.add_argument('--codesign_identity', type=str, required=False) - parser.add_argument('--working_directory', type=str, required=True) parser.add_argument('--dependencies', type=str, nargs='*', required=True) parser.add_argument('--env', type=str, nargs='*', required=True) (args, remaining) = parser.parse_known_args(sys.argv[1:]) @@ -42,14 +43,23 @@ def main(): # Extract environment variables into a dictionary env = {k : v for (k, v) in map(lambda s: s.split('=', 1), args.env)} - # Ensure the file dependencies exist - for file in args.dependencies: - if not os.path.exists(file): - sys.stderr.write('Missing file {} marked as a dependency of a test'.format(file)) - exit(1) + try: + tmpDir = tempfile.mkdtemp() - # Run the executable with the given environment in the given working directory - return subprocess.call(' '.join(remaining), cwd=args.working_directory, env=env, shell=True) + # Ensure the file dependencies exist and copy them to a temporary directory. + for dep in args.dependencies: + if not os.path.exists(dep): + sys.stderr.write('Missing file or directory "{}" marked as a dependency of a test'.format(dep)) + exit(1) + if os.path.isdir(dep): + shutil.copytree(dep, os.path.join(tmpDir, os.path.basename(dep)), symlinks=True) + else: + shutil.copy2(dep, tmpDir) + + # Run the executable with the given environment in the temporary directory. + return subprocess.call(' '.join(remaining), cwd=tmpDir, env=env, shell=True) + finally: + shutil.rmtree(tmpDir) if __name__ == '__main__': exit(main()) diff --git a/libcxx/utils/ssh.py b/libcxx/utils/ssh.py index 20acaeb00e3d..f6b9574e39b2 100644 --- a/libcxx/utils/ssh.py +++ b/libcxx/utils/ssh.py @@ -15,8 +15,11 @@ import argparse import os +import posixpath import subprocess import sys +import tarfile +import tempfile def main(): @@ -29,62 +32,77 @@ def main(): if len(remaining) < 2: sys.stderr.write('Missing actual commands to run') - exit(1) - remaining = remaining[1:] # Skip the '--' + return 1 - # HACK: - # If the first argument is a file that ends in `.tmp.exe`, assume it is - # the name of an executable generated by a test file. This allows us to - # do custom processing like codesigning the executable and changing its - # path when running on the remote host. It's possible for there to be no - # such executable, for example in the case of a .sh.cpp test. - exe = None - if os.path.exists(remaining[0]) and remaining[0].endswith('.tmp.exe'): - exe = remaining.pop(0) - - # If there's an executable, do any necessary codesigning. - if exe and args.codesign_identity: - rc = subprocess.call(['xcrun', 'codesign', '-f', '-s', args.codesign_identity, exe], env={}) - if rc != 0: - sys.stderr.write('Failed to codesign: {}'.format(exe)) - return rc + commandLine = remaining[1:] # Skip the '--' ssh = lambda command: ['ssh', '-oBatchMode=yes', args.host, command] - scp = lambda src, dst: ['scp', '-oBatchMode=yes', '-r', src, '{}:{}'.format(args.host, dst)] - - # Create a temporary directory where the test will be run - tmp = subprocess.check_output(ssh('mktemp -d /tmp/libcxx.XXXXXXXXXX')).strip() - - # Ensure the test dependencies exist and scp them to the temporary directory. - # Test dependencies can be either files or directories, so the `scp` command - # needs to use `-r`. - for dep in args.dependencies: - if not os.path.exists(dep): - sys.stderr.write('Missing file or directory {} marked as a dependency of a test'.format(dep)) - exit(1) - subprocess.call(scp(dep, tmp)) - - # If there's an executable, change its path to be in the temporary directory. - # We know it has been copied to the remote host when we handled the test - # dependencies above. - if exe: - exe = os.path.join(tmp, os.path.basename(exe)) - - # If there's an executable, make sure it has 'execute' permissions on the - # remote host. The host that compiled the executable might not have a notion - # of 'executable' permissions. - if exe: - subprocess.call(ssh('chmod +x {}'.format(exe))) - - # Execute the command through SSH in the temporary directory, with the - # correct environment. - command = [exe] + remaining if exe else remaining - res = subprocess.call(ssh('cd {} && env -i {} {}'.format(tmp, ' '.join(args.env), ' '.join(command)))) - - # Remove the temporary directory when we're done. - subprocess.call(ssh('rm -r {}'.format(tmp))) - - return res + scp = lambda src, dst: ['scp', '-oBatchMode=yes', src, '{}:{}'.format(args.host, dst)] + + # Create a temporary directory where the test will be run. + tmp = subprocess.check_output(ssh('mktemp -d /tmp/libcxx.XXXXXXXXXX'), universal_newlines=True).strip() + + # HACK: + # If an argument is a file that ends in `.tmp.exe`, assume it is the name + # of an executable generated by a test file. We call these test-executables + # below. This allows us to do custom processing like codesigning test-executables + # and changing their path when running on the remote host. It's also possible + # for there to be no such executable, for example in the case of a .sh.cpp + # test. + isTestExe = lambda exe: exe.endswith('.tmp.exe') and os.path.exists(exe) + pathOnRemote = lambda file: posixpath.join(tmp, os.path.basename(file)) + + try: + # Do any necessary codesigning of test-executables found in the command line. + if args.codesign_identity: + for exe in filter(isTestExe, commandLine): + subprocess.check_call(['xcrun', 'codesign', '-f', '-s', args.codesign_identity, exe], env={}) + + # Ensure the test dependencies exist, tar them up and copy the tarball + # over to the remote host. + with tempfile.NamedTemporaryFile(suffix='.tar') as tmpTar: + with tarfile.open(fileobj=tmpTar, mode='w') as tarball: + for dep in args.dependencies: + if not os.path.exists(dep): + sys.stderr.write('Missing file or directory "{}" marked as a dependency of a test'.format(dep)) + return 1 + tarball.add(dep, arcname=os.path.basename(dep)) + + remoteTarball = pathOnRemote(tmpTar.name) + tmpTar.flush() + subprocess.check_call(scp(tmpTar.name, remoteTarball)) + + # Untar the dependencies in the temporary directory and remove the tarball. + remoteCommands = [ + 'tar -xf {} -C {}'.format(remoteTarball, tmp), + 'rm {}'.format(remoteTarball) + ] + + # Make sure all test-executables in the remote command line have 'execute' + # permissions on the remote host. The host that compiled the test-executable + # might not have a notion of 'executable' permissions. + for exe in map(pathOnRemote, filter(isTestExe, commandLine)): + remoteCommands.append('chmod +x {}'.format(exe)) + + # Execute the command through SSH in the temporary directory, with the + # correct environment. We tweak the command line to run it on the remote + # host by transforming the path of test-executables to their path in the + # temporary directory, where we know they have been copied when we handled + # test dependencies above. + remoteCommands += [ + 'cd {}'.format(tmp), + 'export {}'.format(' '.join(args.env)), + ' '.join(pathOnRemote(x) if isTestExe(x) else x for x in commandLine) + ] + + # Finally, SSH to the remote host and execute all the commands. + rc = subprocess.call(ssh(' && '.join(remoteCommands))) + return rc + + finally: + # Make sure the temporary directory is removed when we're done. + subprocess.check_call(ssh('rm -r {}'.format(tmp))) + if __name__ == '__main__': exit(main()) diff --git a/libcxxabi/test/incomplete_type.sh.cpp b/libcxxabi/test/incomplete_type.sh.cpp index 71147241b760..a762d2f9c60e 100644 --- a/libcxxabi/test/incomplete_type.sh.cpp +++ b/libcxxabi/test/incomplete_type.sh.cpp @@ -15,14 +15,15 @@ // UNSUPPORTED: libcxxabi-no-exceptions -// NOTE: Pass -lc++abi explicitly and before -lc++ so that -lc++ doesn't drag +// NOTE: Link libc++abi explicitly and before libc++ so that libc++ doesn't drag // in the system libc++abi installation on OS X. (DYLD_LIBRARY_PATH is ignored // for shell tests because of Apple security features). +// FILE_DEPENDENCIES: %t.exe // RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.one.o // RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.two.o -DTU_ONE -// RUN: %{cxx} %{flags} %t.one.o %t.two.o -lc++abi %{link_flags} -o %t.exe -// RUN: %t.exe +// RUN: %{cxx} %{flags} %t.one.o %t.two.o %{link_libcxxabi} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe #include #include diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index d2af7f50cc4c..fc212f539731 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -137,7 +137,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) { // When accessing a symbol defined in another translation unit, compilers // reserve a .toc entry, allocate a local label and generate toc-indirect -// instuctions: +// instructions: // // addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA // ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index 6dad7c965f1a..9be27986693c 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -263,7 +263,7 @@ DenseMap CallGraphSort::run() { // Sort sections by the profile data provided by -callgraph-profile-file // // This first builds a call graph based on the profile data then merges sections -// according to the C³ huristic. All clusters are then sorted by a density +// according to the C³ heuristic. All clusters are then sorted by a density // metric to further improve locality. DenseMap computeCallGraphProfileOrder() { return CallGraphSort().run(); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index b3031a3723c2..e93dec947d90 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -708,7 +708,7 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // microMIPS variants of these relocations use slightly different // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi() - // to correctly handle less-sugnificant bit of the microMIPS symbol. + // to correctly handle less-significant bit of the microMIPS symbol. uint64_t v = in.mipsGot->getGp(file) + a - p; if (type == R_MIPS_LO16 || type == R_MICROMIPS_LO16) v += 4; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index f52dc701541f..4d507ee9c2c9 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -833,6 +833,7 @@ void LinkerScript::assignOffsets(OutputSection *sec) { if (!(sec->flags & SHF_ALLOC)) dot = 0; + bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; if (ctx->memRegion) @@ -851,19 +852,19 @@ void LinkerScript::assignOffsets(OutputSection *sec) { switchTo(sec); - ctx->lmaOffset = 0; - + // ctx->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() or + // AT>, recompute ctx->lmaOffset; otherwise, if both previous/current LMA + // region is the default, reuse previous lmaOffset; otherwise, reset lmaOffset + // to 0. This emulates heuristics described in + // https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html if (sec->lmaExpr) ctx->lmaOffset = sec->lmaExpr().getValue() - dot; - if (MemoryRegion *mr = sec->lmaRegion) + else if (MemoryRegion *mr = sec->lmaRegion) ctx->lmaOffset = alignTo(mr->curPos, sec->alignment) - dot; + else if (!prevLMARegionIsDefault) + ctx->lmaOffset = 0; - // If neither AT nor AT> is specified for an allocatable section, the linker - // will set the LMA such that the difference between VMA and LMA for the - // section is the same as the preceding output section in the same region - // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html - // This, however, should only be done by the first "non-header" section - // in the segment. + // Propagate ctx->lmaOffset to the first "non-header" section. if (PhdrEntry *l = ctx->outSec->ptLoad) if (sec == findFirstSection(l)) l->lmaOffset = ctx->lmaOffset; diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp index 7453acfcf396..200ade76d306 100644 --- a/lld/ELF/ScriptLexer.cpp +++ b/lld/ELF/ScriptLexer.cpp @@ -187,7 +187,7 @@ static std::vector tokenizeExpr(StringRef s) { break; } - // Get a token before the opreator. + // Get a token before the operator. if (e != 0) ret.push_back(s.substr(0, e)); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index cb7ee8829e53..42c8a71c4185 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -31,7 +31,18 @@ static std::string demangle(StringRef symName) { return std::string(symName); } -std::string toString(const elf::Symbol &b) { return demangle(b.getName()); } +std::string toString(const elf::Symbol &sym) { + StringRef name = sym.getName(); + std::string ret = demangle(name); + + // If sym has a non-default version, its name may have been truncated at '@' + // by Symbol::parseSymbolVersion(). Add the trailing part. This check is safe + // because every symbol name ends with '\0'. + if (name.data()[name.size()] == '@') + ret += name.data() + name.size(); + return ret; +} + std::string toELFString(const Archive::Symbol &b) { return demangle(b.getName()); } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index ac606198afd8..ebee4af1fdad 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -21,6 +21,7 @@ #include "llvm/Object/ELF.h" namespace lld { +// Returns a string representation for a symbol for diagnostics. std::string toString(const elf::Symbol &); // There are two different ways to convert an Archive::Symbol to a string: diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 48339099dc27..8cf813ceffd0 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2151,7 +2151,7 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { eSym->st_size = sym->getSize(); // st_value is usually an address of a symbol, but that has a - // special meaining for uninstantiated common symbols (this can + // special meaning for uninstantiated common symbols (this can // occur if -r is given). if (BssSection *commonSec = getCommonSec(ent.sym)) eSym->st_value = commonSec->alignment; @@ -2250,7 +2250,7 @@ size_t SymtabShndxSection::getSize() const { // DSOs. That means resolving all dynamic symbols takes O(m)*O(n) // where m is the number of DSOs and n is the number of dynamic // symbols. For modern large programs, both m and n are large. So -// making each step faster by using hash tables substiantially +// making each step faster by using hash tables substantially // improves time to load programs. // // (Note that this is not the only way to design the shared library. diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 190a4fd3ac9e..05eee24ce34f 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -364,7 +364,7 @@ class MipsGotSection final : public SyntheticSection { // Try to merge two GOTs. In case of success the `Dst` contains // result of merging and the function returns true. In case of - // ovwerflow the `Dst` is unchanged and the function returns false. + // overflow the `Dst` is unchanged and the function returns false. bool tryMergeGots(FileGot & dst, FileGot & src, bool isPrimary); }; diff --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst index 5b904bb3a1e1..c5115c1c9d6f 100644 --- a/lld/docs/ELF/linker_script.rst +++ b/lld/docs/ELF/linker_script.rst @@ -51,3 +51,27 @@ sh_addralign of an *OutputSection* *S* is the maximum of When an *OutputSection* *S* has both ``address`` and ``ALIGN(section_align)``, GNU ld will set sh_addralign to ``ALIGN(section_align)``. + +Output section LMA +------------------ + +A load address (LMA) can be specified by ``AT(lma)`` or ``AT>lma_region``. + +- ``AT(lma)`` specifies the exact load address. If the linker script does not + have a PHDRS command, then a new loadable segment will be generated. +- ``AT>lma_region`` specifies the LMA region. The lack of ``AT>lma_region`` + means the default region is used. Note, GNU ld propagates the previous LMA + memory region when ``address`` is not specified. The LMA is set to the + current location of the memory region aligned to the section alignment. + If the linker script does not have a PHDRS command, then if + ``lma_region`` is different from the ``lma_region`` for + the previous OutputSection a new loadable segment will be generated. + +The two keywords cannot be specified at the same time. + +If neither ``AT(lma)`` nor ``AT>lma_region`` is specified: + +- If the previous section is also in the default LMA region, the difference + between the LMA and the VMA is computed to be the same as the previous + difference. +- Otherwise, the LMA is set to the VMA. diff --git a/lld/include/lld/Core/Reference.h b/lld/include/lld/Core/Reference.h index 4769882cde50..b104f8495474 100644 --- a/lld/include/lld/Core/Reference.h +++ b/lld/include/lld/Core/Reference.h @@ -91,7 +91,7 @@ class Reference { /// Some relocations require a symbol and a value (e.g. foo + 4). virtual Addend addend() const = 0; - /// During linking, some optimzations may change addend value. + /// During linking, some optimizations may change addend value. virtual void setAddend(Addend) = 0; /// Returns target specific attributes of the reference. diff --git a/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp index a424edf4985a..bee081aec067 100644 --- a/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp +++ b/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -61,7 +61,7 @@ class ArchHandler_arm64 : public ArchHandler { /// Used by GOTPass to update GOT References. void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { - // If GOT slot was instanciated, transform: + // If GOT slot was instantiated, transform: // gotPage21/gotOffset12 -> page21/offset12scale8 // If GOT slot optimized away, transform: // gotPage21/gotOffset12 -> page21/addOffset12 diff --git a/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp index 94a105a6f159..f3636feb217b 100644 --- a/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp +++ b/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -576,5 +576,5 @@ void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { pm.add(std::make_unique(ctx)); } -} // end namesapce mach_o -} // end namesapce lld +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/lld/lib/ReaderWriter/MachO/GOTPass.cpp index 0f80dfa19d09..10e611c1bd2b 100644 --- a/lld/lib/ReaderWriter/MachO/GOTPass.cpp +++ b/lld/lib/ReaderWriter/MachO/GOTPass.cpp @@ -179,5 +179,5 @@ void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { pm.add(std::make_unique(ctx)); } -} // end namesapce mach_o -} // end namesapce lld +} // end namespace mach_o +} // end namespace lld diff --git a/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/lld/lib/ReaderWriter/MachO/TLVPass.cpp index 5f457b863d90..e0a031cfb07b 100644 --- a/lld/lib/ReaderWriter/MachO/TLVPass.cpp +++ b/lld/lib/ReaderWriter/MachO/TLVPass.cpp @@ -136,5 +136,5 @@ void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) { pm.add(std::make_unique(ctx)); } -} // end namesapce mach_o -} // end namesapce lld +} // end namespace mach_o +} // end namespace lld diff --git a/lld/test/COFF/secidx-absolute.s b/lld/test/COFF/secidx-absolute.s index 0b467bbb09bf..8befaf2f456c 100644 --- a/lld/test/COFF/secidx-absolute.s +++ b/lld/test/COFF/secidx-absolute.s @@ -3,7 +3,7 @@ # RUN: lld-link -entry:main -nodefaultlib %t.obj -out:%t.exe # RUN: llvm-readobj %t.exe -sections -section-data | FileCheck %s -# Section relocations against absolute symbols resolve to the last real ouput +# Section relocations against absolute symbols resolve to the last real output # section index plus one. .text diff --git a/lld/test/ELF/Inputs/undef-bad-debug.s b/lld/test/ELF/Inputs/undef-bad-debug.s index bf517f3ea1cd..d3171f023616 100644 --- a/lld/test/ELF/Inputs/undef-bad-debug.s +++ b/lld/test/ELF/Inputs/undef-bad-debug.s @@ -16,7 +16,7 @@ sym4: .long .Lprologue_end - .Lprologue_start # prologue length .Lprologue_start: .byte 1 # minimum instruction length - .byte 1 # maximum operatiosn per instruction + .byte 1 # maximum operations per instruction .byte 1 # default is_stmt .byte -5 # line base .byte 14 # line range @@ -59,7 +59,7 @@ sym4: .long .Lprologue2_end - .Lprologue2_start # prologue length .Lprologue2_start: .byte 1 # minimum instruction length - .byte 1 # maximum operatiosn per instruction + .byte 1 # maximum operations per instruction .byte 1 # default is_stmt .byte -5 # line base .byte 14 # line range diff --git a/lld/test/ELF/allow-multiple-definition.s b/lld/test/ELF/allow-multiple-definition.s index 8a3b97c0102e..bcf5cc7213c3 100644 --- a/lld/test/ELF/allow-multiple-definition.s +++ b/lld/test/ELF/allow-multiple-definition.s @@ -14,7 +14,7 @@ # RUN: llvm-objdump -d %t3 | FileCheck %s # RUN: llvm-objdump -d %t4 | FileCheck --check-prefix=REVERT %s -# inputs contain different constants for instuction movl. +# inputs contain different constants for instruction movl. # Tests below checks that order of files in command line # affects on what symbol will be used. # If flag allow-multiple-definition is enabled the first diff --git a/lld/test/ELF/arm-exidx-add-missing.s b/lld/test/ELF/arm-exidx-add-missing.s index 1beaa299a82e..da3c271f2355 100644 --- a/lld/test/ELF/arm-exidx-add-missing.s +++ b/lld/test/ELF/arm-exidx-add-missing.s @@ -10,7 +10,7 @@ // The range of addresses covered by the table entry is terminated by the // next table entry. This means that an executable section without a .ARM.exidx // section does not terminate the range of addresses. To fix this the linker -// synthesises an EXIDX_CANTUNWIND entry for each section wihout a .ARM.exidx +// synthesises an EXIDX_CANTUNWIND entry for each section without a .ARM.exidx // section. .syntax unified diff --git a/lld/test/ELF/executable-undefined-ignoreall.s b/lld/test/ELF/executable-undefined-ignoreall.s index a479317c0f71..cc38e17cdf61 100644 --- a/lld/test/ELF/executable-undefined-ignoreall.s +++ b/lld/test/ELF/executable-undefined-ignoreall.s @@ -1,7 +1,7 @@ # REQUIRES: x86 ## --unresolved-symbols=ignore-all behaves similar to -shared: -## for PLT relocations to undefined symbols, produce dynamic reloctions if we +## for PLT relocations to undefined symbols, produce dynamic relocations if we ## emit .dynsym. # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o diff --git a/lld/test/ELF/icf-absolute2.s b/lld/test/ELF/icf-absolute2.s index 37e26a9d9c46..5cf1d69bdecf 100644 --- a/lld/test/ELF/icf-absolute2.s +++ b/lld/test/ELF/icf-absolute2.s @@ -4,7 +4,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/icf-absolute2.s -o %t2 # RUN: ld.lld %t %t2 -o /dev/null --icf=all --print-icf-sections | FileCheck -allow-empty %s -## Test we do not crash and do not fold sections which relocations reffering to +## Test we do not crash and do not fold sections which relocations referring to ## absolute symbols with a different values. # CHECK-NOT: selected diff --git a/lld/test/ELF/invalid/broken-relaxation-x64.test b/lld/test/ELF/invalid/broken-relaxation-x64.test index 1bd7fe0cc36d..97a977e2c03a 100644 --- a/lld/test/ELF/invalid/broken-relaxation-x64.test +++ b/lld/test/ELF/invalid/broken-relaxation-x64.test @@ -8,7 +8,7 @@ ## YAML below contains 2 relocations of type R_X86_64_GOTTPOFF, and a .text ## with fake content filled by 0xFF. That means instructions for relaxation are ## "broken", so they does not match any known valid relaxations. We also generate -## .tls section because we need it for correct proccessing of STT_TLS symbol. +## .tls section because we need it for correct processing of STT_TLS symbol. !ELF FileHeader: Class: ELFCLASS64 @@ -44,4 +44,4 @@ Symbols: Value: 0x12345 Size: 4 Binding: STB_GLOBAL - \ No newline at end of file + diff --git a/lld/test/ELF/linkerscript/align1.test b/lld/test/ELF/linkerscript/align1.test index e7b65fc75ffe..62dfdbfc5bd9 100644 --- a/lld/test/ELF/linkerscript/align1.test +++ b/lld/test/ELF/linkerscript/align1.test @@ -34,7 +34,7 @@ SECTIONS { # RUN: ld.lld -o %t5 --script %t.script %t.o # RUN: llvm-objdump --section-headers %t5 | FileCheck %s --check-prefix=ZERO -## Test we fail gracefuly when alignment value is not a power of 2 (#1). +## Test we fail gracefully when alignment value is not a power of 2 (#1). # RUN: echo "SECTIONS { . = 0x123; . = ALIGN(0x123, 3); .aaa : { *(.aaa) } }" > %t.script # RUN: not ld.lld -o /dev/null --script %t.script %t.o 2>&1 | FileCheck -check-prefix=ERR %s diff --git a/lld/test/ELF/linkerscript/at4.s b/lld/test/ELF/linkerscript/at4.s deleted file mode 100644 index a52a33e5cee4..000000000000 --- a/lld/test/ELF/linkerscript/at4.s +++ /dev/null @@ -1,28 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: echo "SECTIONS { \ -# RUN: . = 0x1000; \ -# RUN: .aaa : { *(.aaa) } \ -# RUN: .bbb : AT(0x2008) { *(.bbb) } \ -# RUN: .ccc : { *(.ccc) } \ -# RUN: }" > %t.script -# RUN: ld.lld %t.o --script %t.script -o %t -# RUN: llvm-readelf -l %t | FileCheck %s - -# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# CHECK-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000008 0x000008 R 0x1000 -# CHECK-NEXT: LOAD 0x001008 0x0000000000001008 0x0000000000002008 0x000010 0x000010 R 0x1000 -# CHECK-NEXT: LOAD 0x001018 0x0000000000001018 0x0000000000001018 0x000001 0x000001 R E 0x1000 - -.global _start -_start: - nop - -.section .aaa, "a" -.quad 0 - -.section .bbb, "a" -.quad 0 - -.section .ccc, "a" -.quad 0 diff --git a/lld/test/ELF/linkerscript/lma-offset.s b/lld/test/ELF/linkerscript/lma-offset.s new file mode 100644 index 000000000000..3c739724538e --- /dev/null +++ b/lld/test/ELF/linkerscript/lma-offset.s @@ -0,0 +1,39 @@ +# REQUIRES: x86 +## Test the difference between the VMA and the LMA for sections with AT(). + +# RUN: echo '.globl _start; _start: ret; \ +# RUN: .section .a,"a"; .byte 0; \ +# RUN: .section .b,"a"; .byte 0; \ +# RUN: .section .c,"a"; .byte 0; \ +# RUN: .section .d,"a"; .byte 0; \ +# RUN: .data; .byte 0' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t.o +# RUN: ld.lld -T %s %t.o -o %t +# RUN: llvm-readelf -l %t | FileCheck %s + +# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000001 0x000001 R 0x1000 + +## .b has AT(). It starts a PT_LOAD segment which also includes .c +# CHECK-NEXT: LOAD 0x001001 0x0000000000001001 0x0000000000002005 0x000002 0x000002 R 0x1000 + +## .d has AT(). It starts a PT_LOAD segment, even if the difference between +## LMA and VMA (0x2007-0x1003) is the same as the previous one. +# CHECK-NEXT: LOAD 0x001003 0x0000000000001003 0x0000000000002007 0x000001 0x000001 R 0x1000 + +## The orphan section .text starts a PT_LOAD segment. The difference between +## LMA and VMA (0x2008-0x1004) remains the same +# CHECK-NEXT: LOAD 0x001004 0x0000000000001004 0x0000000000002008 0x000001 0x000001 R E 0x1000 + +## .data starts a PT_LOAD segment. The difference remains the same. +# CHECK-NEXT: LOAD 0x001005 0x0000000000001005 0x0000000000002009 0x000001 0x000001 RW 0x1000 + +SECTIONS { + . = 0x1000; + .a : { *(.a) } + .b : AT(0x2005) { *(.b) } + .c : { *(.c) } + .d : AT(0x2007) { *(.d) } + ## Orphan section .text will be inserted here. + .data : { *(.data) } +} diff --git a/lld/test/ELF/linkerscript/loadaddr.s b/lld/test/ELF/linkerscript/loadaddr.s index e2c82fc6c8cb..055b7422baeb 100644 --- a/lld/test/ELF/linkerscript/loadaddr.s +++ b/lld/test/ELF/linkerscript/loadaddr.s @@ -22,7 +22,7 @@ # CHECK-NEXT: 0000000000002008 g *ABS* 0000000000000000 bbb_lma # CHECK-NEXT: 0000000000003000 g *ABS* 0000000000000000 ccc_lma # CHECK-NEXT: 0000000000004000 g *ABS* 0000000000000000 ddd_lma -# CHECK-NEXT: 0000000000001020 g *ABS* 0000000000000000 txt_lma +# CHECK-NEXT: 0000000000004008 g *ABS* 0000000000000000 txt_lma # ERROR: {{.*}}.script:1: undefined section .zzz .global _start diff --git a/lld/test/ELF/linkerscript/map-file2.test b/lld/test/ELF/linkerscript/map-file2.test index f527e8ecdf80..535043282249 100644 --- a/lld/test/ELF/linkerscript/map-file2.test +++ b/lld/test/ELF/linkerscript/map-file2.test @@ -32,10 +32,10 @@ SECTIONS { # CHECK-NEXT: 1219 3209 8 1 {{.*}}{{/|\\}}map-file2.test.tmp.o:(.ddd) # CHECK-NEXT: 1228 3218 34 8 .eh_frame # CHECK-NEXT: 1228 3218 30 1 {{.*}}{{/|\\}}map-file2.test.tmp.o:(.eh_frame+0x0) -# CHECK-NEXT: 125c 125c 1 4 .text -# CHECK-NEXT: 125c 125c 1 4 {{.*}}{{/|\\}}map-file2.test.tmp.o:(.text) -# CHECK-NEXT: 125c 125c 0 1 f(int) -# CHECK-NEXT: 125c 125c 0 1 _start +# CHECK-NEXT: 125c 324c 1 4 .text +# CHECK-NEXT: 125c 324c 1 4 {{.*}}{{/|\\}}map-file2.test.tmp.o:(.text) +# CHECK-NEXT: 125c 324c 0 1 f(int) +# CHECK-NEXT: 125c 324c 0 1 _start # CHECK-NEXT: 0 0 8 1 .comment # CHECK-NEXT: 0 0 8 1 :(.comment) # CHECK-NEXT: 0 0 48 8 .symtab diff --git a/lld/test/ELF/linkerscript/overlay.test b/lld/test/ELF/linkerscript/overlay.test index 85e140d60ab0..2d3c88759c63 100644 --- a/lld/test/ELF/linkerscript/overlay.test +++ b/lld/test/ELF/linkerscript/overlay.test @@ -28,4 +28,4 @@ SECTIONS { # CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align # CHECK-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000004000 0x000008 0x000008 R 0x1000 # CHECK-NEXT: LOAD 0x002000 0x0000000000001000 0x0000000000004008 0x000004 0x000004 R 0x1000 -# CHECK-NEXT: LOAD 0x002008 0x0000000000001008 0x0000000000001008 0x000001 0x000001 R E 0x1000 +# CHECK-NEXT: LOAD 0x002008 0x0000000000001008 0x0000000000004010 0x000001 0x000001 R E 0x1000 diff --git a/lld/test/ELF/linkerscript/subalign.s b/lld/test/ELF/linkerscript/subalign.s index bf812d17bb87..a817a6993314 100644 --- a/lld/test/ELF/linkerscript/subalign.s +++ b/lld/test/ELF/linkerscript/subalign.s @@ -34,7 +34,7 @@ # RUN: ld.lld %t1.o --script %t4.script -o %t4 # RUN: llvm-objdump -s %t4 | FileCheck --check-prefix=SUBALIGN %s -## Test we fail gracefuly when alignment value is not a power of 2. +## Test we fail gracefully when alignment value is not a power of 2. # RUN: echo "SECTIONS { .aaa : SUBALIGN(3) { *(.aaa*) } }" > %t5.script # RUN: not ld.lld %t1.o --script %t5.script -o /dev/null 2>&1 | FileCheck --check-prefix=ERR2 %s # ERR2: {{.*}}.script:1: alignment must be power of 2 diff --git a/lld/test/ELF/lto/common4.ll b/lld/test/ELF/lto/common4.ll index 7a40e4be32fa..1b041667fb7b 100644 --- a/lld/test/ELF/lto/common4.ll +++ b/lld/test/ELF/lto/common4.ll @@ -2,7 +2,7 @@ ;; Make sure that common symbols are properly internalized. ;; In this file, @a does not interpose any symbol in a DSO, -;; so LTO should be able to internelize it. +;; so LTO should be able to internalize it. ; RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.so.o ; RUN: ld.lld -shared -o %t.so %t.so.o diff --git a/lld/test/ELF/mips-sto-pic-flag.s b/lld/test/ELF/mips-sto-pic-flag.s index c313a8eb3c7f..6c9a660efca4 100644 --- a/lld/test/ELF/mips-sto-pic-flag.s +++ b/lld/test/ELF/mips-sto-pic-flag.s @@ -1,7 +1,7 @@ # REQUIRES: mips # In case of linking PIC and non-PIC code together and generation # of a relocatable object, all PIC symbols should have STO_MIPS_PIC -# flag in the symbol table of the ouput file. +# flag in the symbol table of the output file. # RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux %s -o %t-npic.o # RUN: llvm-mc -filetype=obj -triple=mips-unknown-linux \ diff --git a/lld/test/ELF/pack-dyn-relocs.s b/lld/test/ELF/pack-dyn-relocs.s index 34c9a3cb315c..6ecf354f68a2 100644 --- a/lld/test/ELF/pack-dyn-relocs.s +++ b/lld/test/ELF/pack-dyn-relocs.s @@ -69,7 +69,7 @@ // ANDROID32-HEADERS: 0x6000000F ANDROID_REL [[ADDR]] // ANDROID32-HEADERS: 0x60000010 ANDROID_RELSZ [[SIZE]] -// Packed should have the groups of non-relative reloations first, followed +// Packed should have the groups of non-relative relocations first, followed // by the larger groups of relative relocations (i.e. the 8 and 9 followed // by the 7.) // ANDROID32: Section ({{.+}}) .rel.dyn { diff --git a/lld/test/ELF/ppc32-call-stub-pic.s b/lld/test/ELF/ppc32-call-stub-pic.s index 2c116c596e86..179874620f31 100644 --- a/lld/test/ELF/ppc32-call-stub-pic.s +++ b/lld/test/ELF/ppc32-call-stub-pic.s @@ -33,7 +33,7 @@ ## .got2+0x8000-0x10004 = 0x30000+0x8000-0x10004 = 65536*2+32764 # CHECK-LABEL: <_start>: -# CHECK-NEXT: bcl 20, 31, .+4 +# PIE-NEXT: bcl 20, 31, 0x10210 # PIE-NEXT: 10210: mflr 30 # PIE-NEXT: addis 30, 30, 3 # PIE-NEXT: addi 30, 30, -32412 @@ -52,6 +52,7 @@ # PIE-NEXT: bl 0x10274 ## bl 00008000.plt_pic32.f # PIE-NEXT: bl 0x10284 +# SHARED-NEXT: bcl 20, 31, 0x10230 # SHARED-NEXT: 10230: mflr 30 # SHARED-NEXT: addis 30, 30, 3 # SHARED-NEXT: addi 30, 30, -32420 @@ -116,9 +117,8 @@ ## Operand of addi: 0x100a8-.glink = 24 # CHECK-NEXT: addis 11, 11, 0 # CHECK-NEXT: mflr 0 -# CHECK-NEXT: bcl 20, 31, .+4 -# PIE-NEXT: 102ac: addi 11, 11, 24 -# SHARED-NEXT: 102cc: addi 11, 11, 24 +# CHECK-NEXT: bcl 20, 31, 0x[[#%x,NEXT:]] +# CHECK-NEXT: [[#%x,NEXT]]: addi 11, 11, 24 # CHECK-NEXT: mflr 12 # CHECK-NEXT: mtlr 0 diff --git a/lld/test/ELF/ppc32-long-thunk.s b/lld/test/ELF/ppc32-long-thunk.s index 57c442f7723d..90c284ded5fa 100644 --- a/lld/test/ELF/ppc32-long-thunk.s +++ b/lld/test/ELF/ppc32-long-thunk.s @@ -40,7 +40,7 @@ ## high-0x2028 = 0x02002008-0x2020 = 65536*512-24 # PI: <__LongThunk_high>: # PI-NEXT: 2018: mflr 0 -# PI-NEXT: bcl 20, 31, .+4 +# PI-NEXT: bcl 20, 31, 0x2020 # PI-NEXT: 2020: mflr 12 # PI-NEXT: addis 12, 12, 512 # PI-NEXT: addi 12, 12, -24 @@ -51,7 +51,7 @@ ## .text_high+16-0x2048 = 0x02002010-0x2048 = 65536*512-48 # PI: <__LongThunk_>: # PI-NEXT: 2038: mflr 0 -# PI-NEXT: bcl 20, 31, .+4 +# PI-NEXT: bcl 20, 31, 0x2040 # PI-NEXT: 2040: mflr 12 # PI-NEXT: addis 12, 12, 512 # PI-NEXT: addi 12, 12, -48 diff --git a/lld/test/ELF/ppc32-reloc-rel.s b/lld/test/ELF/ppc32-reloc-rel.s index 49d149550aae..b2bd0a461ca1 100644 --- a/lld/test/ELF/ppc32-reloc-rel.s +++ b/lld/test/ELF/ppc32-reloc-rel.s @@ -7,7 +7,7 @@ beq 1f 1: # CHECK-LABEL: section .R_PPC_REL14: -# CHECK: bt 2, .+4 +# CHECK: 100100b4: bt 2, 0x100100b8 .section .R_PPC_REL24,"ax",@progbits b 1f diff --git a/lld/test/ELF/ppc64-local-dynamic.s b/lld/test/ELF/ppc64-local-dynamic.s index d7013ee6d192..f0f3967387dd 100644 --- a/lld/test/ELF/ppc64-local-dynamic.s +++ b/lld/test/ELF/ppc64-local-dynamic.s @@ -95,7 +95,7 @@ k: // OutputRelocs-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend // OutputRelocs-NEXT: R_PPC64_DTPMOD64 -// Check that the got has 3 entries, 1 for the TOC and 1 stucture of 2 entries +// Check that the got has 3 entries, 1 for the TOC and 1 structure of 2 entries // for the tls variables. Also verify the address so we can check the offsets // we calculate for each relocation type. // CheckGot: got 00000018 0000000000020100 diff --git a/lld/test/ELF/ppc64-reloc-rel.s b/lld/test/ELF/ppc64-reloc-rel.s index be64a4f767ac..ea7367f38ca0 100644 --- a/lld/test/ELF/ppc64-reloc-rel.s +++ b/lld/test/ELF/ppc64-reloc-rel.s @@ -12,7 +12,7 @@ beq 1f 1: # CHECK-LABEL: Disassembly of section .R_PPC64_REL14: -# CHECK: bt 2, .+4 +# CHECK: bt 2, 0x10010198 .section .R_PPC64_REL16,"ax",@progbits .globl rel16 diff --git a/lld/test/ELF/ppc64-split-stack-adjust-overflow.s b/lld/test/ELF/ppc64-split-stack-adjust-overflow.s index b1a104474f10..f9bbf6176af5 100644 --- a/lld/test/ELF/ppc64-split-stack-adjust-overflow.s +++ b/lld/test/ELF/ppc64-split-stack-adjust-overflow.s @@ -59,6 +59,6 @@ caller: # CHECK-NEXT: addis 12, 1, -32768 # CHECK-NEXT: nop # CHECK-NEXT: cmpld 7, 12, 0 -# CHECK-NEXT: bt- 28, .+36 +# CHECK-NEXT: bt- 28, 0x10010204 .section .note.GNU-split-stack,"",@progbits diff --git a/lld/test/ELF/ppc64-split-stack-adjust-size-success.s b/lld/test/ELF/ppc64-split-stack-adjust-size-success.s index 63e0b414dc09..27fbb95c01df 100644 --- a/lld/test/ELF/ppc64-split-stack-adjust-size-success.s +++ b/lld/test/ELF/ppc64-split-stack-adjust-size-success.s @@ -58,21 +58,21 @@ caller: # CHECK-NEXT: addis 12, 1, -1 # CHECK-NEXT: addi 12, 12, 32736 # CHECK-NEXT: cmpld 7, 12, 0 -# CHECK-NEXT: bt- 28, .+36 +# CHECK-NEXT: bt- 28, 0x10010204 # SMALL-LABEL: caller # SMALL: ld 0, -28736(13) # SMALL-NEXT: addi 12, 1, -4128 # SMALL-NEXT: nop # SMALL-NEXT: cmpld 7, 12, 0 -# SMALL-NEXT: bt- 28, .+36 +# SMALL-NEXT: bt- 28, 0x10010204 # ZERO-LABEL: caller # ZERO: ld 0, -28736(13) # ZERO-NEXT: addi 12, 1, -32 # ZERO-NEXT: nop # ZERO-NEXT: cmpld 7, 12, 0 -# ZERO-NEXT: bt- 28, .+36 +# ZERO-NEXT: bt- 28, 0x10010204 .p2align 2 .global main .type main, @function diff --git a/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s b/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s index 197df150c495..002f80d5cb80 100644 --- a/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s +++ b/lld/test/ELF/ppc64-split-stack-prologue-adjust-success.s @@ -16,7 +16,7 @@ # A caller with a stack that is small enough that the addis instruction # from the split-stack prologue is unneeded, and after the prologue adjustment -# the stack size still fits whithin 16 bits. +# the stack size still fits within 16 bits. .p2align 2 .global caller_small_stack .type caller_small_stack, @function @@ -54,9 +54,9 @@ caller_small_stack: # CHECK-NEXT: addi 12, 1, -16416 # CHECK-NEXT: nop # CHECK-NEXT: cmpld 7, 12, 0 -# CHECK-NEXT: bt- 28, .+36 +# CHECK-NEXT: bt- 28, 0x10010204 -# A caller that has a stack size that fits whithin 16 bits, but the adjusted +# A caller that has a stack size that fits within 16 bits, but the adjusted # stack size after prologue adjustment now overflows 16 bits needing both addis # and addi instructions. .p2align 2 @@ -132,7 +132,7 @@ caller_large_stack: # CHECK-NEXT: addis 12, 1, -1 # CHECK-NEXT: addi 12, 12, -16416 # CHECK-NEXT: cmpld 7, 12, 0 -# CHECK-NEXT: bt- 28, .+44 +# CHECK-NEXT: bt- 28, 0x100102bc # A caller with a stack size that is larger then 16 bits, but aligned such that # the addi instruction is unneeded. @@ -174,7 +174,7 @@ caller_large_aligned_stack: # CHECK-NEXT: addis 12, 1, -2 # CHECK-NEXT: addi 12, 12, -16384 # CHECK-NEXT: cmpld 7, 12, 0 -# CHECK-NEXT: bt- 28, .+40 +# CHECK-NEXT: bt- 28, 0x10010318 # main only calls split-stack functions or __morestack so # there should be no adjustment of its split-stack prologue. diff --git a/lld/test/ELF/threads.s b/lld/test/ELF/threads.s index 6d4b36924031..7a9eeec00e08 100644 --- a/lld/test/ELF/threads.s +++ b/lld/test/ELF/threads.s @@ -1,3 +1,4 @@ +# REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o ## A positive integer is allowed. diff --git a/lld/test/ELF/undef-suggest-version.s b/lld/test/ELF/undef-suggest-version.s new file mode 100644 index 000000000000..790b9fcab788 --- /dev/null +++ b/lld/test/ELF/undef-suggest-version.s @@ -0,0 +1,57 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo 'v1 {bar;};' > %t.ver +# RUN: ld.lld -shared --version-script %t.ver %t.o -o %t.so + +## For an unversioned undefined symbol, check we can suggest the symbol with the +## default version. +# RUN: echo 'call bat' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef1.o +# RUN: not ld.lld %t.so %tdef1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DEFAULT1 %s + +# DEFAULT1: error: undefined symbol: bat +# DEFAULT1-NEXT: >>> referenced by {{.*}}.o:(.text+0x1) +# DEFAULT1-NEXT: >>> did you mean: bar{{$}} +# DEFAULT1-NEXT: >>> defined in: {{.*}}.so + +## For a versioned undefined symbol, check we can suggest the symbol with the +## default version. +# RUN: echo '.symver bar.v2,bar@v2; call bar.v2' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef2.o +# RUN: not ld.lld %t.so %tdef2.o -o /dev/null 2>&1 | FileCheck --check-prefix=DEFAULT2 %s + +# DEFAULT2: error: undefined symbol: bar@v2 +# DEFAULT2-NEXT: >>> referenced by {{.*}}.o:(.text+0x1) +# DEFAULT2-NEXT: >>> did you mean: bar{{$}} +# DEFAULT2-NEXT: >>> defined in: {{.*}}.so + +## For an unversioned undefined symbol, check we can suggest a symbol with +## a non-default version. +# RUN: echo 'call foo; call _Z3fooi' | llvm-mc -filetype=obj -triple=x86_64 - -o %thidden1.o +# RUN: not ld.lld %t.so %thidden1.o -o /dev/null 2>&1 | FileCheck --check-prefix=HIDDEN1 %s + +# HIDDEN1: error: undefined symbol: foo +# HIDDEN1-NEXT: >>> referenced by {{.*}}.o:(.text+0x1) +# HIDDEN1-NEXT: >>> did you mean: foo@v1 +# HIDDEN1-NEXT: >>> defined in: {{.*}}.so +# HIDDEN1-EMPTY: +# HIDDEN1-NEXT: error: undefined symbol: foo(int) +# HIDDEN1-NEXT: >>> referenced by {{.*}}.o:(.text+0x6) +# HIDDEN1-NEXT: >>> did you mean: foo(int)@v1 +# HIDDEN1-NEXT: >>> defined in: {{.*}}.so + +## For a versioned undefined symbol, check we can suggest a symbol with +## a different version. +# RUN: echo '.symver foo.v2,foo@v2; call foo.v2' | llvm-mc -filetype=obj -triple=x86_64 - -o %thidden2.o +# RUN: not ld.lld %t.so %thidden2.o -o /dev/null 2>&1 | FileCheck --check-prefix=HIDDEN2 %s + +# HIDDEN2: error: undefined symbol: foo@v2 +# HIDDEN2-NEXT: >>> referenced by {{.*}}.o:(.text+0x1) +# HIDDEN2-NEXT: >>> did you mean: foo@v1 +# HIDDEN2-NEXT: >>> defined in: {{.*}}.so + +## %t.so exports bar@@v1 and two VERSYM_HIDDEN symbols: foo@v1 and _Z3fooi@v1. +.globl foo.v1, _Z3fooi.v1, bar +.symver foo.v1,foo@v1 +.symver _Z3fooi.v1,_Z3fooi@v1 +foo.v1: +_Z3fooi.v1: +bar: diff --git a/lld/test/ELF/warn-backrefs.s b/lld/test/ELF/warn-backrefs.s index 629bd3be2fd9..8a9ee2291ece 100644 --- a/lld/test/ELF/warn-backrefs.s +++ b/lld/test/ELF/warn-backrefs.s @@ -39,7 +39,7 @@ # RUN: echo ".globl foo; foo: call bar" | llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t4.o # RUN: ld.lld --fatal-warnings --warn-backrefs %t1.o --start-lib %t3.o %t4.o --end-lib -o /dev/null -# We don't report backward references to weak symbols as they can be overriden later. +# We don't report backward references to weak symbols as they can be overridden later. # RUN: echo ".weak foo; foo:" | llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t5.o # RUN: ld.lld --fatal-warnings --warn-backrefs --start-lib %t5.o --end-lib %t1.o %t2.o -o /dev/null diff --git a/lld/test/mach-o/error-simulator-vs-macosx.yaml b/lld/test/mach-o/error-simulator-vs-macosx.yaml index 609eb3be43ab..fcf1774d760a 100644 --- a/lld/test/mach-o/error-simulator-vs-macosx.yaml +++ b/lld/test/mach-o/error-simulator-vs-macosx.yaml @@ -1,7 +1,7 @@ # RUN: ld64.lld -arch i386 -macosx_version_min 10.8 %s %p/Inputs/hello-world-x86.yaml -o %t && llvm-nm -m %t | FileCheck %s # RUN: not ld64.lld -arch i386 -ios_simulator_version_min 5.0 %s %p/Inputs/hello-world-x86.yaml -o %t 2>&1 | FileCheck %s --check-prefix=ERROR # -# Test that i386 can link with a macos version but gives an error with a simululator version. +# Test that i386 can link with a macos version but gives an error with a simulator version. # --- !mach-o diff --git a/lld/test/mach-o/parse-literals-error.yaml b/lld/test/mach-o/parse-literals-error.yaml index 9dad0cbbf974..de8b47c53047 100644 --- a/lld/test/mach-o/parse-literals-error.yaml +++ b/lld/test/mach-o/parse-literals-error.yaml @@ -1,7 +1,7 @@ # RUN: not ld64.lld -arch x86_64 -r -print_atoms %s -o %t 2> %t.err # RUN: FileCheck %s < %t.err # -# Test for error if literal section is not correct size mulitple. +# Test for error if literal section is not correct size multiple. # --- !mach-o diff --git a/lld/test/wasm/early-exit-for-bad-paths.s b/lld/test/wasm/early-exit-for-bad-paths.s new file mode 100644 index 000000000000..2866bfa62f86 --- /dev/null +++ b/lld/test/wasm/early-exit-for-bad-paths.s @@ -0,0 +1,22 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o + +# RUN: not wasm-ld %t.o -o does_not_exist/output 2>&1 | \ +# RUN: FileCheck %s -check-prefixes=NO-DIR-OUTPUT,CHECK +# RUN: not wasm-ld %t.o -o %s/dir_is_a_file 2>&1 | \ +# RUN: FileCheck %s -check-prefixes=DIR-IS-OUTPUT,CHECK +# TODO(sbc): check similar check for -Map file once we add that option + +# NO-DIR-OUTPUT: error: cannot open output file does_not_exist/output: +# DIR-IS-OUTPUT: error: cannot open output file {{.*}}/dir_is_a_file: + +# We should exit before doing the actual link. If an undefined symbol error is +# discovered we haven't bailed out early as expected. +# CHECK-NOT: undefined_symbol + +# RUN: not wasm-ld %t.o -o / 2>&1 | FileCheck %s -check-prefixes=ROOT,CHECK +# ROOT: error: cannot open output file / + +_start: + .functype _start () -> () + call undefined_symbol + end_function diff --git a/lld/test/wasm/entry-signature.ll b/lld/test/wasm/entry-signature.ll index 8e245b14e964..f7f3d481acfc 100644 --- a/lld/test/wasm/entry-signature.ll +++ b/lld/test/wasm/entry-signature.ll @@ -1,4 +1,4 @@ -; Verify that the entry point signauture can be flexible. +; Verify that the entry point signature can be flexible. ; RUN: llc -filetype=obj %s -o %t.o ; RUN: wasm-ld -o %t1.wasm %t.o diff --git a/lld/test/wasm/export-optional-lazy.ll b/lld/test/wasm/export-optional-lazy.ll index 960e71c6ae6c..c37a3e5183eb 100644 --- a/lld/test/wasm/export-optional-lazy.ll +++ b/lld/test/wasm/export-optional-lazy.ll @@ -1,7 +1,7 @@ ; Optional linker-synthetic symbols are only created if they are undefined ; in the final output. ; This test is for a regression where an explicit --export of an lazy archive -; symbol caused an undefined referece to an optional symbol to occur *after* +; symbol caused an undefined reference to an optional symbol to occur *after* ; the optional symbols were created. ; RUN: llc -filetype=obj %s -o %t.o diff --git a/lld/test/wasm/lto/incompatible.ll b/lld/test/wasm/lto/incompatible.ll index 6f7c154e959c..335756420639 100644 --- a/lld/test/wasm/lto/incompatible.ll +++ b/lld/test/wasm/lto/incompatible.ll @@ -1,6 +1,6 @@ ; REQUIRES: x86 ; RUN: llvm-as %s -o %t.bc -; RUN: not wasm-ld %t.bc -o out.wasm 2>&1 | FileCheck %s +; RUN: not wasm-ld %t.bc -o %t.wasm 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/lld/test/wasm/lto/opt-level.ll b/lld/test/wasm/lto/opt-level.ll index f6156e725c7d..d2d552728b16 100644 --- a/lld/test/wasm/lto/opt-level.ll +++ b/lld/test/wasm/lto/opt-level.ll @@ -11,7 +11,7 @@ ; RUN: FileCheck --check-prefix=INVALID %s ; INVALID: invalid optimization level for LTO: 6 -; RUN: not wasm-ld -o %t3 -m elf_x86_64 -e main --lto-O-1 %t.o 2>&1 | \ +; RUN: not wasm-ld -o %t3 -e main --lto-O-1 %t.o 2>&1 | \ ; RUN: FileCheck --check-prefix=INVALIDNEGATIVE %s ; INVALIDNEGATIVE: invalid optimization level for LTO: 4294967295 diff --git a/lld/test/wasm/lto/signature-mismatch.ll b/lld/test/wasm/lto/signature-mismatch.ll index e12d91866023..cf1a998826fc 100644 --- a/lld/test/wasm/lto/signature-mismatch.ll +++ b/lld/test/wasm/lto/signature-mismatch.ll @@ -3,7 +3,7 @@ ; RUN: not wasm-ld --fatal-warnings %t.o %t1.o -o %t.wasm 2>&1 | FileCheck %s ; Test that functions defined in bitcode correctly report signature -; mistmaches with existing undefined sybmols in normal objects. +; mismatches with existing undefined sybmols in normal objects. target triple = "wasm32-unknown-unknown" diff --git a/lld/test/wasm/lto/undef.ll b/lld/test/wasm/lto/undef.ll index 65e8e4642d5e..a5477cb3c3bd 100644 --- a/lld/test/wasm/lto/undef.ll +++ b/lld/test/wasm/lto/undef.ll @@ -8,7 +8,7 @@ target triple = "wasm32-unknown-unknown" declare i32 @bar() ; Symbols such as foo which are only called indirectly are handled slightly -; differently with resepect to signature checking. +; differently with respect to signature checking. declare i32 @foo() @ptr = global i8* bitcast (i32 ()* @foo to i8*), align 8 diff --git a/lld/test/wasm/responsefile.test b/lld/test/wasm/responsefile.test index ba2afd0cc8fd..85ac41f93f2b 100644 --- a/lld/test/wasm/responsefile.test +++ b/lld/test/wasm/responsefile.test @@ -10,11 +10,11 @@ RUN: FileCheck --check-prefix=INVRSP %s INVRSP: invalid response file quoting: foobar RUN: echo "blah\foo" > %t.rsp -RUN: not wasm-ld --rsp-quoting=windows @%t.rsp 2>&1 | \ +RUN: not wasm-ld -o a.out --rsp-quoting=windows @%t.rsp 2>&1 | \ RUN: FileCheck --check-prefix=WINRSP %s WINRSP: error: cannot open blah\foo: RUN: echo "blah\foo" > %t.rsp -RUN: not wasm-ld --rsp-quoting=posix @%t.rsp 2>&1 | \ +RUN: not wasm-ld -o a.out --rsp-quoting=posix @%t.rsp 2>&1 | \ RUN: FileCheck --check-prefix=POSRSP %s POSRSP: error: cannot open blahfoo: diff --git a/lld/test/wasm/signature-mismatch-unknown.ll b/lld/test/wasm/signature-mismatch-unknown.ll index b354ed12207d..9bbad0065dcd 100644 --- a/lld/test/wasm/signature-mismatch-unknown.ll +++ b/lld/test/wasm/signature-mismatch-unknown.ll @@ -4,7 +4,7 @@ ; RUN: wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o ; Also test the case where there are two different object files that contains -; referneces ret32: +; references ret32: ; %t.main.o: Does not call ret32 directly; used the wrong signature. ; %t.call-ret32.o: Calls ret32 directly; uses the correct signature. ; RUN: llc -filetype=obj %p/Inputs/call-ret32.ll -o %t.call-ret32.o diff --git a/lld/test/wasm/undefined-entry.test b/lld/test/wasm/undefined-entry.test index a36212f2c9e3..3106a76510f4 100644 --- a/lld/test/wasm/undefined-entry.test +++ b/lld/test/wasm/undefined-entry.test @@ -3,7 +3,7 @@ RUN: not wasm-ld -o %t.wasm %t.ret32.o 2>&1 | FileCheck %s RUN: not wasm-ld --allow-undefined -o %t.wasm %t.ret32.o 2>&1 | FileCheck %s RUN: not wasm-ld -entry=foo -o %t.wasm %t.ret32.o 2>&1 | FileCheck %s -check-prefix=CHECK-CUSTOM -CHECK: error: entry symbol not defined (pass --no-entry to supress): _start -CHECK-CUSTOM: error: entry symbol not defined (pass --no-entry to supress): foo +CHECK: error: entry symbol not defined (pass --no-entry to suppress): _start +CHECK-CUSTOM: error: entry symbol not defined (pass --no-entry to suppress): foo RUN: wasm-ld --no-entry -o %t.wasm %t.ret32.o diff --git a/lld/test/wasm/visibility-hidden.ll b/lld/test/wasm/visibility-hidden.ll index f5731c4e964a..99acd5651f7b 100644 --- a/lld/test/wasm/visibility-hidden.ll +++ b/lld/test/wasm/visibility-hidden.ll @@ -3,12 +3,12 @@ ; RUN: rm -f %t2.a ; RUN: llvm-ar rcs %t2.a %t2.o -; Test that symbols with hidden visitiblity are not export, even with +; Test that symbols with hidden visibility are not export, even with ; --export-dynamic ; RUN: wasm-ld --export-dynamic %t.o %t2.a -o %t.wasm ; RUN: obj2yaml %t.wasm | FileCheck %s -; Test that symbols with default visitiblity are not exported without +; Test that symbols with default visibility are not exported without ; --export-dynamic ; RUN: wasm-ld %t.o %t2.a -o %t.nodef.wasm ; RUN: obj2yaml %t.nodef.wasm | FileCheck %s -check-prefix=NO-DEFAULT diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index d0b01d5ccef0..b6cd879e89c6 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -15,6 +15,7 @@ #include "Writer.h" #include "lld/Common/Args.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Reproduce.h" #include "lld/Common/Strings.h" @@ -304,6 +305,8 @@ void LinkerDriver::createFiles(opt::InputArgList &args) { break; } } + if (files.empty() && errorCount() == 0) + error("no input files"); } static StringRef getEntry(opt::InputArgList &args) { @@ -728,16 +731,27 @@ void LinkerDriver::link(ArrayRef argsArr) { errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20); readConfigs(args); + + createFiles(args); + if (errorCount()) + return; + setConfigs(); checkOptions(args); + if (errorCount()) + return; if (auto *arg = args.getLastArg(OPT_allow_undefined_file)) readImportFile(arg->getValue()); - if (!args.hasArg(OPT_INPUT)) { - error("no input files"); + // Fail early if the output file or map file is not writable. If a user has a + // long link, e.g. due to a large LTO link, they do not wish to run it and + // find that it failed because there was a mistake in their command-line. + if (auto e = tryCreateFile(config->outputFile)) + error("cannot open output file " + config->outputFile + ": " + e.message()); + // TODO(sbc): add check for map file too once we add support for that. + if (errorCount()) return; - } // Handle --trace-symbol. for (auto *arg : args.filtered(OPT_trace_symbol)) @@ -748,10 +762,6 @@ void LinkerDriver::link(ArrayRef argsArr) { createSyntheticSymbols(); - createFiles(args); - if (errorCount()) - return; - // Add all files to the symbol table. This will add almost all // symbols that we need to the symbol table. for (InputFile *f : files) @@ -774,7 +784,7 @@ void LinkerDriver::link(ArrayRef argsArr) { if (entrySym && entrySym->isDefined()) entrySym->forceExport = true; else - error("entry symbol not defined (pass --no-entry to supress): " + + error("entry symbol not defined (pass --no-entry to suppress): " + config->entry); } diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index dc095adf6dff..99d18178eeac 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -138,7 +138,7 @@ static void reportTypeError(const Symbol *existing, const InputFile *file, } // Check the type of new symbol matches that of the symbol is replacing. -// Returns true if the function types match, false is there is a singature +// Returns true if the function types match, false is there is a signature // mismatch. static bool signatureMatches(FunctionSymbol *existing, const WasmSignature *newSig) { @@ -279,7 +279,7 @@ Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags, std::tie(s, wasInserted) = insert(name, file); auto replaceSym = [&](Symbol *sym) { - // If the new defined function doesn't have signture (i.e. bitcode + // If the new defined function doesn't have signature (i.e. bitcode // functions) but the old symbol does, then preserve the old signature const WasmSignature *oldSig = s->getSignature(); auto* newSym = replaceSymbol(sym, name, flags, file, function); diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 9803ad439f9c..522ea46b38bc 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -108,7 +108,7 @@ class SymbolTable { llvm::DenseMap symMap; std::vector symVector; - // For certain symbols types, e.g. function symbols, we allow for muliple + // For certain symbols types, e.g. function symbols, we allow for multiple // variants of the same symbol with different signatures. llvm::DenseMap> symVariants; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 349fc651533e..7112db6b0826 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -156,7 +156,7 @@ void Symbol::setGOTIndex(uint32_t index) { LLVM_DEBUG(dbgs() << "setGOTIndex " << name << " -> " << index << "\n"); assert(gotIndex == INVALID_INDEX); if (config->isPic) { - // Any symbol that is assigned a GOT entry must be exported othewise the + // Any symbol that is assigned a GOT entry must be exported otherwise the // dynamic linker won't be able create the entry that contains it. forceExport = true; } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 11836e11f3ec..3400cde1c7e2 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -415,7 +415,7 @@ class LazySymbol : public Symbol { // Lazy symbols can have a signature because they can replace an // UndefinedFunction which which case we need to be able to preserve the - // signture. + // signature. // TODO(sbc): This repetition of the signature field is inelegant. Revisit // the use of class hierarchy to represent symbol taxonomy. const WasmSignature *signature = nullptr; diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index d18027c7248c..dfff30029168 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -196,10 +196,11 @@ class Type : public std::enable_shared_from_this, public UserID { uint32_t GetEncodingMask(); + typedef uint32_t Payload; /// Return the language-specific payload. - uint32_t GetPayload() { return m_payload; } + Payload GetPayload() { return m_payload; } /// Return the language-specific payload. - void SetPayload(uint32_t opaque_payload) { m_payload = opaque_payload; } + void SetPayload(Payload opaque_payload) { m_payload = opaque_payload; } protected: ConstString m_name; @@ -215,7 +216,7 @@ class Type : public std::enable_shared_from_this, public UserID { CompilerType m_compiler_type; ResolveState m_compiler_type_resolve_state; /// Language-specific flags. - uint32_t m_payload; + Payload m_payload; Type *GetEncodingType(); diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 966d460ea13d..5058594505f5 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -2414,9 +2414,12 @@ def expect_expr( # Set the usual default options for normal expressions. options.SetIgnoreBreakpoints(True) - options.SetLanguage(frame.GuessLanguage()) - eval_result = frame.EvaluateExpression(expr, options) + if self.frame().IsValid(): + options.SetLanguage(frame.GuessLanguage()) + eval_result = self.frame().EvaluateExpression(expr, options) + else: + eval_result = self.target().EvaluateExpression(expr, options) if not eval_result.GetError().Success(): self.assertTrue(eval_result.GetError().Success(), diff --git a/lldb/source/Host/common/Host.cpp b/lldb/source/Host/common/Host.cpp index b2485393cd6a..8a6af3881a0f 100644 --- a/lldb/source/Host/common/Host.cpp +++ b/lldb/source/Host/common/Host.cpp @@ -501,6 +501,8 @@ Status Host::RunShellCommand(const Args &args, const FileSpec &working_dir, launch_info.SetArguments(args, first_arg_is_executable); } + launch_info.GetEnvironment() = Host::GetEnvironment(); + if (working_dir) launch_info.SetWorkingDirectory(working_dir); llvm::SmallString<64> output_file_path; diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index eba3060f8ec6..045ba7f3671f 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -9,13 +9,9 @@ #include "lldb/Host/Host.h" #include +#include -// On device doesn't have supporty for XPC. -#if defined(__APPLE__) && (defined(__arm64__) || defined(__aarch64__)) -#define NO_XPC_SERVICES 1 -#endif - -#if !defined(NO_XPC_SERVICES) +#if TARGET_OS_OSX #define __XPC_PRIVATE_H__ #include @@ -135,6 +131,8 @@ return false; } +#if TARGET_OS_OSX + static void *AcceptPIDFromInferior(void *arg) { const char *connect_url = (const char *)arg; ConnectionFileDescriptor file_conn; @@ -153,8 +151,6 @@ return NULL; } -#if !defined(__arm__) && !defined(__arm64__) && !defined(__aarch64__) - const char *applscript_in_new_tty = "tell application \"Terminal\"\n" " activate\n" " do script \"/bin/bash -c '%s';exit\"\n" @@ -307,13 +303,13 @@ repeat with the_window in (get windows)\n\ return error; } -#endif // #if !defined(__arm__) && !defined(__arm64__) && !defined(__aarch64__) +#endif // TARGET_OS_OSX bool Host::OpenFileInExternalEditor(const FileSpec &file_spec, uint32_t line_no) { -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#if !TARGET_OS_OSX return false; -#else +#else // !TARGET_OS_OSX // We attach this to an 'odoc' event to specify a particular selection typedef struct { int16_t reserved0; // must be zero @@ -404,7 +400,7 @@ repeat with the_window in (get windows)\n\ } return true; -#endif // #if !defined(__arm__) && !defined(__arm64__) && !defined(__aarch64__) +#endif // TARGET_OS_OSX } Environment Host::GetEnvironment() { return Environment(*_NSGetEnviron()); } @@ -689,7 +685,7 @@ static bool GetMacOSXProcessUserAndGroup(ProcessInstanceInfo &process_info) { return false; } -#if !NO_XPC_SERVICES +#if TARGET_OS_OSX static void PackageXPCArguments(xpc_object_t message, const char *prefix, const Args &args) { size_t count = args.GetArgumentCount(); @@ -841,7 +837,7 @@ static short GetPosixspawnFlags(const ProcessLaunchInfo &launch_info) { static Status LaunchProcessXPC(const char *exe_path, ProcessLaunchInfo &launch_info, lldb::pid_t &pid) { -#if !NO_XPC_SERVICES +#if TARGET_OS_OSX Status error = getXPCAuthorization(launch_info); if (error.Fail()) return error; @@ -1194,7 +1190,7 @@ static Status LaunchProcessPosixSpawn(const char *exe_path, static bool ShouldLaunchUsingXPC(ProcessLaunchInfo &launch_info) { bool result = false; -#if !NO_XPC_SERVICES +#if TARGET_OS_OSX bool launchingAsRoot = launch_info.GetUserID() == 0; bool currentUserIsRoot = HostInfo::GetEffectiveUserID() == 0; @@ -1226,7 +1222,7 @@ static bool ShouldLaunchUsingXPC(ProcessLaunchInfo &launch_info) { } if (launch_info.GetFlags().Test(eLaunchFlagLaunchInTTY)) { -#if !defined(__arm__) && !defined(__arm64__) && !defined(__aarch64__) +#if TARGET_OS_OSX return LaunchInNewTerminalWithAppleScript(exe_spec.GetPath().c_str(), launch_info); #else diff --git a/lldb/source/Interpreter/OptionValuePathMappings.cpp b/lldb/source/Interpreter/OptionValuePathMappings.cpp index ebff5c4dca3e..2784279579f0 100644 --- a/lldb/source/Interpreter/OptionValuePathMappings.cpp +++ b/lldb/source/Interpreter/OptionValuePathMappings.cpp @@ -61,7 +61,7 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, count); } else { bool changed = false; - for (size_t i = 1; i < argc; i += 2, ++idx) { + for (size_t i = 1; i < argc; i += 2) { const char *orginal_path = args.GetArgumentAtIndex(i); const char *replace_path = args.GetArgumentAtIndex(i + 1); if (VerifyPathExists(replace_path)) { @@ -70,9 +70,13 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, if (!m_path_mappings.Replace(a, b, idx, m_notify_changes)) m_path_mappings.Append(a, b, m_notify_changes); changed = true; + idx++; } else { + std::string previousError = + error.Fail() ? std::string(error.AsCString()) + "\n" : ""; error.SetErrorStringWithFormat( - "the replacement path doesn't exist: \"%s\"", replace_path); + "%sthe replacement path doesn't exist: \"%s\"", + previousError.c_str(), replace_path); break; } } @@ -109,9 +113,11 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, m_value_was_set = true; changed = true; } else { + std::string previousError = + error.Fail() ? std::string(error.AsCString()) + "\n" : ""; error.SetErrorStringWithFormat( - "the replacement path doesn't exist: \"%s\"", replace_path); - break; + "%sthe replacement path doesn't exist: \"%s\"", + previousError.c_str(), replace_path); } } if (changed) @@ -135,7 +141,7 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, bool changed = false; if (op == eVarSetOperationInsertAfter) ++idx; - for (size_t i = 1; i < argc; i += 2, ++idx) { + for (size_t i = 1; i < argc; i += 2) { const char *orginal_path = args.GetArgumentAtIndex(i); const char *replace_path = args.GetArgumentAtIndex(i + 1); if (VerifyPathExists(replace_path)) { @@ -143,9 +149,13 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, ConstString b(replace_path); m_path_mappings.Insert(a, b, idx, m_notify_changes); changed = true; + idx++; } else { + std::string previousError = + error.Fail() ? std::string(error.AsCString()) + "\n" : ""; error.SetErrorStringWithFormat( - "the replacement path doesn't exist: \"%s\"", replace_path); + "%sthe replacement path doesn't exist: \"%s\"", + previousError.c_str(), replace_path); break; } } diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 38b4472f50a7..385b291df709 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -782,6 +782,9 @@ std::unique_ptr ObjectFilePECOFF::CreateCallFrameInfo() { if (!data_dir_exception.vmaddr) return {}; + if (m_coff_header.machine != llvm::COFF::IMAGE_FILE_MACHINE_AMD64) + return {}; + return std::make_unique(*this, data_dir_exception.vmaddr, data_dir_exception.vmsize); } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index a609b06d4e13..f355681f2679 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -56,12 +56,12 @@ class Declaration; /// The implementation of lldb::Type's m_payload field for TypeSystemClang. class TypePayloadClang { /// Layout: bit 31 ... IsCompleteObjCClass. - uint32_t m_payload = 0; + Type::Payload m_payload = 0; public: TypePayloadClang() = default; explicit TypePayloadClang(bool is_complete_objc_class); explicit TypePayloadClang(uint32_t opaque_payload) : m_payload(opaque_payload) {} - operator uint32_t() { return m_payload; } + operator Type::Payload() { return m_payload; } static constexpr unsigned ObjCClassBit = 1 << 31; bool IsCompleteObjCClass() { return Flags(m_payload).Test(ObjCClassBit); } diff --git a/lldb/test/API/commands/expression/anonymous-struct/TestCallUserAnonTypedef.py b/lldb/test/API/commands/expression/anonymous-struct/TestCallUserAnonTypedef.py index b6e035752896..1a4e16610042 100644 --- a/lldb/test/API/commands/expression/anonymous-struct/TestCallUserAnonTypedef.py +++ b/lldb/test/API/commands/expression/anonymous-struct/TestCallUserAnonTypedef.py @@ -6,22 +6,15 @@ Ticket: https://llvm.org/bugs/show_bug.cgi?id=26790 """ - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class TestExprLookupAnonStructTypedef(TestBase): mydir = TestBase.compute_mydir(__file__) - def setUp(self): - TestBase.setUp(self) - # Find the breakpoint - self.line = line_number('main.cpp', '// lldb testsuite break') - @expectedFailureAll( oslist=['linux'], archs=['arm'], @@ -29,16 +22,5 @@ def setUp(self): def test(self): """Test typedeffed untagged struct arguments for function call expressions""" self.build() - - self.runCmd("file "+self.getBuildArtifact("a.out"), - CURRENT_EXECUTABLE_SET) - lldbutil.run_break_set_by_file_and_line( - self, - "main.cpp", - self.line, - num_expected_locations=-1, - loc_exact=True - ) - - self.runCmd("run", RUN_SUCCEEDED) + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.cpp")) self.expect_expr("multiply(&s)", result_type="double", result_value="1") diff --git a/lldb/test/API/commands/expression/anonymous-struct/main.cpp b/lldb/test/API/commands/expression/anonymous-struct/main.cpp index 5b170c5f943a..d6366e787152 100644 --- a/lldb/test/API/commands/expression/anonymous-struct/main.cpp +++ b/lldb/test/API/commands/expression/anonymous-struct/main.cpp @@ -1,26 +1,17 @@ -#include - typedef struct { - float f; - int i; + float f; + int i; } my_untagged_struct; -double multiply(my_untagged_struct *s) -{ - return s->f * s->i; -} +double multiply(my_untagged_struct *s) { return s->f * s->i; } -double multiply(my_untagged_struct *s, int x) -{ - return multiply(s) * x; -} +double multiply(my_untagged_struct *s, int x) { return multiply(s) * x; } -int main(int argc, char **argv) -{ - my_untagged_struct s = { - .f = (float)argc, - .i = argc, - }; - // lldb testsuite break - return !(multiply(&s, argc) == pow(argc, 3)); +int main(int argc, char **argv) { + my_untagged_struct s = { + .f = (float)argc, + .i = argc, + }; + // break here + return multiply(&s, argc) > 0; } diff --git a/lldb/test/API/commands/expression/call-function/TestCallBuiltinFunction.py b/lldb/test/API/commands/expression/call-function/TestCallBuiltinFunction.py index 31478884ad7d..55ba2717c013 100644 --- a/lldb/test/API/commands/expression/call-function/TestCallBuiltinFunction.py +++ b/lldb/test/API/commands/expression/call-function/TestCallBuiltinFunction.py @@ -17,24 +17,10 @@ class ExprCommandCallBuiltinFunction(TestBase): # Builtins are expanded by Clang, so debug info shouldn't matter. NO_DEBUG_INFO_TESTCASE = True - def setUp(self): - TestBase.setUp(self) - # Find the line number to break for main.c. - self.line = line_number( - 'main.cpp', - '// Please test these expressions while stopped at this line:') - def test(self): self.build() - # Set breakpoint in main and run exe - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) - - # Test different builtin functions. + target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) self.expect_expr("__builtin_isinf(0.0f)", result_type="int", result_value="0") self.expect_expr("__builtin_isnormal(0.0f)", result_type="int", result_value="0") diff --git a/lldb/test/API/commands/expression/call-function/TestCallStdStringFunction.py b/lldb/test/API/commands/expression/call-function/TestCallStdStringFunction.py index 261e702fa59a..f94bcae34cf9 100644 --- a/lldb/test/API/commands/expression/call-function/TestCallStdStringFunction.py +++ b/lldb/test/API/commands/expression/call-function/TestCallStdStringFunction.py @@ -2,26 +2,15 @@ Test calling std::String member functions. """ - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class ExprCommandCallFunctionTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break for main.c. - self.line = line_number( - 'main.cpp', - '// Please test these expressions while stopped at this line:') - @expectedFailureAll( compiler="icc", bugnumber="llvm.org/pr14437, fails with ICC 13.1") @@ -29,15 +18,7 @@ def setUp(self): def test_with(self): """Test calling std::String member function.""" self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), - CURRENT_EXECUTABLE_SET) - - # Some versions of GCC encode two locations for the 'return' statement - # in main.cpp - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.cpp")) self.expect("print str", substrs=['Hello world']) diff --git a/lldb/test/API/commands/expression/call-function/TestCallStopAndContinue.py b/lldb/test/API/commands/expression/call-function/TestCallStopAndContinue.py index 0f0f1a54e31c..1191176aa706 100644 --- a/lldb/test/API/commands/expression/call-function/TestCallStopAndContinue.py +++ b/lldb/test/API/commands/expression/call-function/TestCallStopAndContinue.py @@ -2,13 +2,10 @@ Test calling a function, stopping in the call, continue and gather the result on stop. """ - - import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class ExprCommandCallStopContinueTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) @@ -17,27 +14,16 @@ def setUp(self): # Call super's setUp(). TestBase.setUp(self) # Find the line number to break for main.c. - self.line = line_number( - 'main.cpp', - '// Please test these expressions while stopped at this line:') - self.func_line = line_number('main.cpp', '{5, "five"}') def test(self): """Test gathering result from interrupted function call.""" self.build() - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - - # Some versions of GCC encode two locations for the 'return' statement - # in main.cpp - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.cpp")) lldbutil.run_break_set_by_file_and_line( self, "main.cpp", - self.func_line, + line_number('main.cpp', '{5, "five"}'), num_expected_locations=-1, loc_exact=True) diff --git a/lldb/test/API/commands/expression/call-function/TestCallUserDefinedFunction.py b/lldb/test/API/commands/expression/call-function/TestCallUserDefinedFunction.py index 98cd0f24f36c..edaa76174b47 100644 --- a/lldb/test/API/commands/expression/call-function/TestCallUserDefinedFunction.py +++ b/lldb/test/API/commands/expression/call-function/TestCallUserDefinedFunction.py @@ -7,36 +7,19 @@ """ - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class ExprCommandCallUserDefinedFunction(TestBase): mydir = TestBase.compute_mydir(__file__) - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - # Find the line number to break for main.c. - self.line = line_number( - 'main.cpp', - '// Please test these expressions while stopped at this line:') - def test(self): """Test return values of user defined function calls.""" self.build() - - # Set breakpoint in main and run exe - self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) - lldbutil.run_break_set_by_file_and_line( - self, "main.cpp", self.line, num_expected_locations=-1, loc_exact=True) - - self.runCmd("run", RUN_SUCCEEDED) + lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.cpp")) # Test recursive function call. self.expect_expr("fib(5)", result_type="unsigned int", result_value="5") diff --git a/lldb/test/API/commands/expression/call-function/main.cpp b/lldb/test/API/commands/expression/call-function/main.cpp index cc5f52dbf567..a383ce5c22a0 100644 --- a/lldb/test/API/commands/expression/call-function/main.cpp +++ b/lldb/test/API/commands/expression/call-function/main.cpp @@ -1,53 +1,34 @@ -#include -#include #include +#include -struct Five -{ - int number; - const char *name; +struct Five { + int number; + const char *name; }; -Five -returnsFive() -{ - Five my_five = {5, "five"}; - return my_five; +Five returnsFive() { + Five my_five = {5, "five"}; + return my_five; } -unsigned int -fib(unsigned int n) -{ - if (n < 2) - return n; - else - return fib(n - 1) + fib(n - 2); +unsigned int fib(unsigned int n) { + if (n < 2) + return n; + else + return fib(n - 1) + fib(n - 2); } -int -add(int a, int b) -{ - return a + b; -} +int add(int a, int b) { return a + b; } -bool -stringCompare(const char *str) -{ - if (strcmp( str, "Hello world" ) == 0) - return true; - else - return false; +bool stringCompare(const char *str) { + if (strcmp(str, "Hello world") == 0) + return true; + else + return false; } -int main (int argc, char const *argv[]) -{ - std::string str = "Hello world"; - std::cout << str << std::endl; - std::cout << str.c_str() << std::endl; - Five main_five = returnsFive(); -#if 0 - print str - print str.c_str() -#endif - return 0; // Please test these expressions while stopped at this line: +int main(int argc, char const *argv[]) { + std::string str = "Hello world"; + Five main_five = returnsFive(); + return strlen(str.c_str()); // break here } diff --git a/lldb/test/API/commands/expression/char/TestExprsChar.py b/lldb/test/API/commands/expression/char/TestExprsChar.py index f1fa78053846..a1a4568aa92e 100644 --- a/lldb/test/API/commands/expression/char/TestExprsChar.py +++ b/lldb/test/API/commands/expression/char/TestExprsChar.py @@ -1,44 +1,21 @@ - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class ExprCharTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - - self.main_source = "main.cpp" - self.main_source_spec = lldb.SBFileSpec(self.main_source) - def do_test(self, dictionary=None): """These basic expression commands should work as expected.""" self.build(dictionary=dictionary) - (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, - '// Break here', self.main_source_spec) - frame = thread.GetFrameAtIndex(0) - - value = frame.EvaluateExpression("foo(c)") - self.assertTrue(value.IsValid()) - self.assertTrue(value.GetError().Success()) - self.assertEqual(value.GetValueAsSigned(0), 1) - - value = frame.EvaluateExpression("foo(sc)") - self.assertTrue(value.IsValid()) - self.assertTrue(value.GetError().Success()) - self.assertEqual(value.GetValueAsSigned(0), 2) + lldbutil.run_to_source_breakpoint(self, '// Break here', lldb.SBFileSpec("main.cpp")) - value = frame.EvaluateExpression("foo(uc)") - self.assertTrue(value.IsValid()) - self.assertTrue(value.GetError().Success()) - self.assertEqual(value.GetValueAsSigned(0), 3) + self.expect_expr("foo(c)", result_value="1") + self.expect_expr("foo(sc)", result_value="2") + self.expect_expr("foo(uc)", result_value="3") def test_default_char(self): self.do_test() diff --git a/lldb/test/API/commands/expression/xvalue/TestXValuePrinting.py b/lldb/test/API/commands/expression/xvalue/TestXValuePrinting.py index 3a394d781f0a..f5122b84839a 100644 --- a/lldb/test/API/commands/expression/xvalue/TestXValuePrinting.py +++ b/lldb/test/API/commands/expression/xvalue/TestXValuePrinting.py @@ -1,36 +1,15 @@ - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - class ExprXValuePrintingTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - - self.main_source = "main.cpp" - self.main_source_spec = lldb.SBFileSpec(self.main_source) - - def do_test(self, dictionary=None): - """Printing an xvalue should work.""" - self.build(dictionary=dictionary) - - (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, - '// Break here', self.main_source_spec) - frame = thread.GetFrameAtIndex(0) - - value = frame.EvaluateExpression("foo().data") - self.assertTrue(value.IsValid()) - self.assertTrue(value.GetError().Success()) - self.assertEqual(value.GetValueAsSigned(), 1234) - @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr21765") def test(self): - self.do_test() - + """Printing an xvalue should work.""" + self.build() + lldbutil.run_to_source_breakpoint(self, '// Break here', lldb.SBFileSpec("main.cpp")) + self.expect_expr("foo().data", result_value="1234") diff --git a/lldb/test/API/commands/watchpoints/watchpoint_count/Makefile b/lldb/test/API/commands/watchpoints/watchpoint_count/Makefile new file mode 100644 index 000000000000..10495940055b --- /dev/null +++ b/lldb/test/API/commands/watchpoints/watchpoint_count/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/commands/watchpoints/watchpoint_count/TestWatchpointCount.py b/lldb/test/API/commands/watchpoints/watchpoint_count/TestWatchpointCount.py new file mode 100644 index 000000000000..9ad21522b4aa --- /dev/null +++ b/lldb/test/API/commands/watchpoints/watchpoint_count/TestWatchpointCount.py @@ -0,0 +1,44 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestWatchpointCount(TestBase): + mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + + @skipIf(oslist=["linux"], archs=["aarch64"]) + def test_watchpoint_count(self): + self.build() + (_, process, thread, _) = lldbutil.run_to_source_breakpoint(self, "patatino", lldb.SBFileSpec("main.c")) + frame = thread.GetFrameAtIndex(0) + first_var = frame.FindVariable("x1") + second_var = frame.FindVariable("x2") + + error = lldb.SBError() + first_watch = first_var.Watch(True, False, True, error) + if not error.Success(): + self.fail( + "Failed to make watchpoint for x1: %s" % + (error.GetCString())) + + second_watch = second_var.Watch(True, False, True, error) + if not error.Success(): + self.fail( + "Failed to make watchpoint for x2: %s" % + (error.GetCString())) + process.Continue() + + stop_reason = thread.GetStopReason() + self.assertEqual(stop_reason, lldb.eStopReasonWatchpoint, "watchpoint for x1 not hit") + stop_reason_descr = thread.GetStopDescription(256) + self.assertEqual(stop_reason_descr, "watchpoint 1") + + process.Continue() + stop_reason = thread.GetStopReason() + self.assertEqual(stop_reason, lldb.eStopReasonWatchpoint, "watchpoint for x2 not hit") + stop_reason_descr = thread.GetStopDescription(256) + self.assertEqual(stop_reason_descr, "watchpoint 2") diff --git a/lldb/test/API/commands/watchpoints/watchpoint_count/main.c b/lldb/test/API/commands/watchpoints/watchpoint_count/main.c new file mode 100644 index 000000000000..fc9a370e41f3 --- /dev/null +++ b/lldb/test/API/commands/watchpoints/watchpoint_count/main.c @@ -0,0 +1,13 @@ +#include +#include + +int main() { + uint8_t x1 = 0; + uint16_t x2 = 0; + + printf("patatino\n"); + + x1 += 1; + x2 += 2; + return 0; +} diff --git a/lldb/test/API/functionalities/source-map/TestTargetSourceMap.py b/lldb/test/API/functionalities/source-map/TestTargetSourceMap.py index ac03d80023e4..c9800e6f199e 100644 --- a/lldb/test/API/functionalities/source-map/TestTargetSourceMap.py +++ b/lldb/test/API/functionalities/source-map/TestTargetSourceMap.py @@ -1,6 +1,7 @@ import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * +import os class TestTargetSourceMap(TestBase): @@ -10,6 +11,21 @@ class TestTargetSourceMap(TestBase): @no_debug_info_test def test_source_map(self): """Test target.source-map' functionality.""" + + def assertBreakpointWithSourceMap(src_path): + # Set a breakpoint after we remap source and verify that it succeeds + bp = target.BreakpointCreateByLocation(src_path, 2) + self.assertEquals(bp.GetNumLocations(), 1, + "make sure breakpoint was resolved with map") + + # Now make sure that we can actually FIND the source file using this + # remapping: + retval = lldb.SBCommandReturnObject() + self.dbg.GetCommandInterpreter().HandleCommand("source list -f main.c -l 2", retval) + self.assertTrue(retval.Succeeded(), "source list didn't succeed.") + self.assertNotEqual(retval.GetOutput(), None, "We got no ouput from source list") + self.assertTrue("return" in retval.GetOutput(), "We didn't find the source file...") + # Set the target soure map to map "./" to the current test directory src_dir = self.getSourceDir() src_path = os.path.join(src_dir, "main.c") @@ -25,19 +41,68 @@ def test_source_map(self): bp = target.BreakpointCreateByLocation(src_path, 2) self.assertEquals(bp.GetNumLocations(), 0, "make sure no breakpoints were resolved without map") - src_map_cmd = 'settings set target.source-map . "%s"' % (src_dir) - self.dbg.HandleCommand(src_map_cmd) - # Set a breakpoint after we remap source and verify that it succeeds - bp = target.BreakpointCreateByLocation(src_path, 2) - self.assertEquals(bp.GetNumLocations(), 1, - "make sure breakpoint was resolved with map") - - # Now make sure that we can actually FIND the source file using this - # remapping: - retval = lldb.SBCommandReturnObject() - self.dbg.GetCommandInterpreter().HandleCommand("source list -f main.c -l 2", retval) - self.assertTrue(retval.Succeeded(), "source list didn't succeed.") - self.assertNotEqual(retval.GetOutput(), None, "We got no ouput from source list") - self.assertTrue("return" in retval.GetOutput(), "We didn't find the source file...") + invalid_path = src_dir + "invalid_path" + invalid_path2 = src_dir + "invalid_path2" + + # We make sure the error message contains all the invalid paths + self.expect( + 'settings set target.source-map . "%s" . "%s" . "%s"' % (invalid_path, src_dir, invalid_path2), + substrs=[ + 'the replacement path doesn\'t exist: "%s"' % (invalid_path), + 'the replacement path doesn\'t exist: "%s"' % (invalid_path2), + ], + error=True, + ) + self.expect( + 'settings show target.source-map', + substrs=['[0] "." -> "%s"' % (src_dir)], + ) + assertBreakpointWithSourceMap(src_path) + + # Index 0 is the valid mapping, and modifying it to an invalid one should have no effect + self.expect( + 'settings replace target.source-map 0 . "%s"' % (invalid_path), + substrs=['error: the replacement path doesn\'t exist: "%s"' % (invalid_path)], + error=True, + ) + self.expect( + 'settings show target.source-map', + substrs=['[0] "." -> "%s"' % (src_dir)] + ) + assertBreakpointWithSourceMap(src_path) + + # Let's clear and add the mapping in with insert-after + self.runCmd('settings remove target.source-map 0') + self.expect( + 'settings show target.source-map', + endstr="target.source-map (path-map) =\n", + ) + # We add a valid but useless mapping so that we can use insert-after + another_valid_path = os.path.dirname(src_dir) + self.runCmd('settings set target.source-map . "%s"' % (another_valid_path)) + + self.expect( + 'settings replace target.source-map 0 . "%s"' % (invalid_path), + substrs=['error: the replacement path doesn\'t exist: "%s"' % (invalid_path)], + error=True, + ) + self.expect( + 'settings show target.source-map', + substrs=['[0] "." -> "%s"' % (another_valid_path)] + ) + + # Let's clear and add the mapping in with append + self.expect('settings remove target.source-map 0') + self.expect( + 'settings show target.source-map', + endstr="target.source-map (path-map) =\n", + ) + + self.expect( + 'settings append target.source-map . "%s" . "%s"' % (invalid_path, src_dir), + substrs=['error: the replacement path doesn\'t exist: "%s"' % (invalid_path)], + error=True, + ) + assertBreakpointWithSourceMap(src_path) diff --git a/lldb/test/API/python_api/sbplatform/Makefile b/lldb/test/API/python_api/sbplatform/Makefile new file mode 100644 index 000000000000..99998b20bcb0 --- /dev/null +++ b/lldb/test/API/python_api/sbplatform/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/python_api/sbplatform/TestSBPlatform.py b/lldb/test/API/python_api/sbplatform/TestSBPlatform.py new file mode 100644 index 000000000000..4735f6ea3b49 --- /dev/null +++ b/lldb/test/API/python_api/sbplatform/TestSBPlatform.py @@ -0,0 +1,22 @@ +"""Test the SBPlatform APIs.""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * + +class SBPlatformAPICase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + NO_DEBUG_INFO_TESTCASE = True + + @add_test_categories(['pyapi']) + def test_run(self): + self.build() + plat = lldb.SBPlatform.GetHostPlatform() + + os.environ["MY_TEST_ENV_VAR"]="SBPlatformAPICase.test_run" + def cleanup(): + del os.environ["MY_TEST_ENV_VAR"] + self.addTearDownHook(cleanup) + cmd = lldb.SBPlatformShellCommand(self.getBuildArtifact("a.out")) + self.assertTrue(plat.Run(cmd).Success()) + self.assertIn("MY_TEST_ENV_VAR=SBPlatformAPICase.test_run", cmd.GetOutput()) diff --git a/lldb/test/API/python_api/sbplatform/main.cpp b/lldb/test/API/python_api/sbplatform/main.cpp new file mode 100644 index 000000000000..9f2aca26ab8d --- /dev/null +++ b/lldb/test/API/python_api/sbplatform/main.cpp @@ -0,0 +1,8 @@ +#include +#include + +int main() { + printf("MY_TEST_ENV_VAR=%s\n", getenv("MY_TEST_ENV_VAR")); + + return 0; +} diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index e5d4b05d987c..3e7bda88e6af 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -1067,31 +1067,34 @@ uint32_t DNBArchMachARM64::GetHardwareWatchpointHit(nub_addr_t &addr) { "DNBArchMachARM64::GetHardwareWatchpointHit() addr = 0x%llx", (uint64_t)addr); - // This is the watchpoint value to match against, i.e., word address. - nub_addr_t wp_val = addr & ~((nub_addr_t)3); if (kret == KERN_SUCCESS) { DBG &debug_state = m_state.dbg; uint32_t i, num = NumSupportedHardwareWatchpoints(); for (i = 0; i < num; ++i) { nub_addr_t wp_addr = GetWatchAddress(debug_state, i); - DNBLogThreadedIf(LOG_WATCHPOINTS, "DNBArchMachARM64::" - "GetHardwareWatchpointHit() slot: %u " - "(addr = 0x%llx).", - i, (uint64_t)wp_addr); - if (wp_val == wp_addr) { - uint32_t byte_mask = bits(debug_state.__wcr[i], 12, 5); - - // Sanity check the byte_mask, first. - if (LowestBitSet(byte_mask) < 0) - continue; - - // Check that the watchpoint is enabled. - if (!IsWatchpointEnabled(debug_state, i)) - continue; - - // Compute the starting address (from the point of view of the - // debugger). - addr = wp_addr + LowestBitSet(byte_mask); + uint32_t byte_mask = bits(debug_state.__wcr[i], 12, 5); + + DNBLogThreadedIf(LOG_WATCHPOINTS, "DNBArchImplX86_64::" + "GetHardwareWatchpointHit() slot: %u " + "(addr = 0x%llx; byte_mask = 0x%x)", + i, static_cast(wp_addr), + byte_mask); + + if (!IsWatchpointEnabled(debug_state, i)) + continue; + + if (bits(wp_addr, 48, 3) != bits(addr, 48, 3)) + continue; + + // Sanity check the byte_mask + uint32_t lsb = LowestBitSet(byte_mask); + if (lsb < 0) + continue; + + uint64_t byte_to_match = bits(addr, 2, 0); + + if (byte_mask & (1 << byte_to_match)) { + addr = wp_addr + lsb; return i; } } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 058e76fc2b0c..f6c0a66f4407 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1107,14 +1107,16 @@ Currently, only the following parameter attributes are defined: .. _noalias: ``noalias`` - This indicates that objects accessed via pointer values + This indicates that memory locations accessed via pointer values :ref:`based ` on the argument or return value are not also accessed, during the execution of the function, via pointer values not - *based* on the argument or return value. The attribute on a return value - also has additional semantics described below. The caller shares the - responsibility with the callee for ensuring that these requirements are met. - For further details, please see the discussion of the NoAlias response in - :ref:`alias analysis `. + *based* on the argument or return value. This guarantee only holds for + memory locations that are *modified*, by any means, during the execution of + the function. The attribute on a return value also has additional semantics + described below. The caller shares the responsibility with the callee for + ensuring that these requirements are met. For further details, please see + the discussion of the NoAlias response in :ref:`alias analysis `. Note that this definition of ``noalias`` is intentionally similar to the definition of ``restrict`` in C99 for function arguments. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index ce04592bf53e..5f5ef62f0139 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -233,31 +233,6 @@ class TargetTransformInfo { /// the EXT operation. int getExtCost(const Instruction *I, const Value *Src) const; - /// Estimate the cost of a function call when lowered. - /// - /// The contract for this is the same as \c getOperationCost except that it - /// supports an interface that provides extra information specific to call - /// instructions. - /// - /// This is the most basic query for estimating call cost: it only knows the - /// function type and (potentially) the number of arguments at the call site. - /// The latter is only interesting for varargs function types. - int getCallCost(FunctionType *FTy, int NumArgs = -1, - const User *U = nullptr) const; - - /// Estimate the cost of calling a specific function when lowered. - /// - /// This overload adds the ability to reason about the particular function - /// being called in the event it is a library call with special lowering. - int getCallCost(const Function *F, int NumArgs = -1, - const User *U = nullptr) const; - - /// Estimate the cost of calling a specific function when lowered. - /// - /// This overload allows specifying a set of candidate argument values. - int getCallCost(const Function *F, ArrayRef Arguments, - const User *U = nullptr) const; - /// \returns A value by which our inlining threshold should be multiplied. /// This is primarily used to bump up the inlining threshold wholesale on /// targets where calls are unusually expensive. @@ -279,15 +254,11 @@ class TargetTransformInfo { int getInlinerVectorBonusPercent() const; /// Estimate the cost of an intrinsic when lowered. - /// - /// Mirrors the \c getCallCost method but uses an intrinsic identifier. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys, const User *U = nullptr) const; /// Estimate the cost of an intrinsic when lowered. - /// - /// Mirrors the \c getCallCost method but uses an intrinsic identifier. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, const User *U = nullptr) const; @@ -1206,10 +1177,6 @@ class TargetTransformInfo::Concept { virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; - virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0; - virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0; - virtual int getCallCost(const Function *F, - ArrayRef Arguments, const User *U) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getInlinerVectorBonusPercent() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -1455,16 +1422,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); } - int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override { - return Impl.getCallCost(FTy, NumArgs, U); - } - int getCallCost(const Function *F, int NumArgs, const User *U) override { - return Impl.getCallCost(F, NumArgs, U); - } - int getCallCost(const Function *F, - ArrayRef Arguments, const User *U) override { - return Impl.getCallCost(F, Arguments, U); - } unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 765d35a05a46..8749fa49010b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -132,21 +132,6 @@ class TargetTransformInfoImplBase { return TTI::TCC_Basic; } - unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) { - assert(FTy && "FunctionType must be provided to this routine."); - - // The target-independent implementation just measures the size of the - // function by approximating that each argument will take on average one - // instruction to prepare. - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = FTy->getNumParams(); - - return TTI::TCC_Basic * (NumArgs + 1); - } - unsigned getInliningThresholdMultiplier() { return 1; } int getInlinerVectorBonusPercent() { return 150; } @@ -726,37 +711,6 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} public: - using BaseT::getCallCost; - - unsigned getCallCost(const Function *F, int NumArgs, const User *U) { - assert(F && "A concrete function must be provided to this routine."); - - if (NumArgs < 0) - // Set the argument number to the number of explicit arguments in the - // function. - NumArgs = F->arg_size(); - - if (Intrinsic::ID IID = F->getIntrinsicID()) { - FunctionType *FTy = F->getFunctionType(); - SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); - return static_cast(this) - ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); - } - - if (!static_cast(this)->isLoweredToCall(F)) - return TTI::TCC_Basic; // Give a basic cost if it will be lowered - // directly. - - return static_cast(this)->getCallCost(F->getFunctionType(), NumArgs, U); - } - - unsigned getCallCost(const Function *F, ArrayRef Arguments, - const User *U) { - // Simply delegate to generic handling of the call. - // FIXME: We should use instsimplify or something else to catch calls which - // will constant fold with these arguments. - return static_cast(this)->getCallCost(F, Arguments.size(), U); - } using BaseT::getGEPCost; @@ -898,15 +852,19 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (auto CS = ImmutableCallSite(U)) { const Function *F = CS.getCalledFunction(); - if (!F) { - // Just use the called value type. - Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); - return TargetTTI->getCallCost(cast(FTy), - CS.arg_size(), U); - } + if (F) { + FunctionType *FTy = F->getFunctionType(); + if (Intrinsic::ID IID = F->getIntrinsicID()) { + SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); + return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U); + } - SmallVector Arguments(CS.arg_begin(), CS.arg_end()); - return TargetTTI->getCallCost(F, Arguments, U); + if (!TargetTTI->isLoweredToCall(F)) + return TTI::TCC_Basic; // Give a basic cost if it will be lowered + + return TTI::TCC_Basic * (FTy->getNumParams() + 1); + } + return TTI::TCC_Basic * (CS.arg_size() + 1); } if (isa(U) || isa(U) || isa(U)) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 6797ed2369d8..36aea31365c2 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -339,22 +339,8 @@ bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0); /// /// This is the reverse process of "canWidenShuffleElements", but can always /// succeed. -template -void scaleShuffleMask(size_t Scale, ArrayRef Mask, - SmallVectorImpl &ScaledMask) { - assert(Scale > 0 && "Unexpected scaling factor"); - - // Fast-path: if no scaling, then it is just a copy. - if (Scale == 1) { - ScaledMask.assign(Mask.begin(), Mask.end()); - return; - } - - ScaledMask.clear(); - for (int MaskElt : Mask) - for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt) - ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt); -} +void scaleShuffleMask(size_t Scale, ArrayRef Mask, + SmallVectorImpl &ScaledMask); /// Compute a map of integer instructions to their minimum legal type /// size. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 723da1a4fd28..a4aa4a7cbd69 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -582,8 +582,9 @@ class IRTranslator : public MachineFunctionPass { /// Get the alignment of the given memory operation instruction. This will /// either be the explicitly specified value or the ABI-required alignment for /// the type being accessed (according to the Module's DataLayout). - /// FIXME: Remove once transition to Align is over. - inline unsigned getMemOpAlignment(const Instruction &I) { + LLVM_ATTRIBUTE_DEPRECATED( + inline unsigned getMemOpAlignment(const Instruction &I), + "Use getMemOpAlign instead") { return getMemOpAlign(I).value(); } diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 069d0aa45095..c68b073ebb8c 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -142,6 +142,23 @@ template <> struct ScalarEnumerationTraits { } }; +template <> struct ScalarTraits { + static void output(const MaybeAlign &Alignment, void *, + llvm::raw_ostream &out) { + out << uint64_t(Alignment ? Alignment->value() : 0U); + } + static StringRef input(StringRef Scalar, void *, MaybeAlign &Alignment) { + unsigned long long n; + if (getAsUnsignedInteger(Scalar, 10, n)) + return "invalid number"; + if (n > 0 && !isPowerOf2_64(n)) + return "must be 0 or a power of two"; + Alignment = MaybeAlign(n); + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + } // end namespace yaml } // end namespace llvm @@ -212,7 +229,7 @@ struct MachineStackObject { ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - unsigned Alignment = 0; + MaybeAlign Alignment = None; TargetStackID::Value StackID; StringValue CalleeSavedRegister; bool CalleeSavedRestored = true; @@ -252,7 +269,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); + YamlIO.mapOptional("alignment", Object.Alignment, None); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. @@ -278,7 +295,7 @@ struct FixedMachineStackObject { ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - unsigned Alignment = 0; + MaybeAlign Alignment = None; TargetStackID::Value StackID; bool IsImmutable = false; bool IsAliased = false; @@ -327,7 +344,7 @@ template <> struct MappingTraits { FixedMachineStackObject::DefaultType); // Don't print the default type. YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); YamlIO.mapOptional("size", Object.Size, (uint64_t)0); - YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); + YamlIO.mapOptional("alignment", Object.Alignment, None); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); if (Object.Type != FixedMachineStackObject::SpillSlot) { YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); @@ -411,7 +428,7 @@ template <> struct MappingTraits { struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; - unsigned Alignment = 0; + MaybeAlign Alignment = None; bool IsTargetSpecific = false; bool operator==(const MachineConstantPoolValue &Other) const { @@ -425,7 +442,7 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); YamlIO.mapOptional("value", Constant.Value, StringValue()); - YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0); + YamlIO.mapOptional("alignment", Constant.Alignment, None); YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false); } }; @@ -571,7 +588,7 @@ template <> struct MappingTraits> { struct MachineFunction { StringRef Name; - unsigned Alignment = 0; + MaybeAlign Alignment = None; bool ExposesReturnsTwice = false; // GISel MachineFunctionProperties. bool Legalized = false; @@ -599,7 +616,7 @@ struct MachineFunction { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0); + YamlIO.mapOptional("alignment", MF.Alignment, None); YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); YamlIO.mapOptional("legalized", MF.Legalized, false); YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 03811bc5145c..f0fb7655881b 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -462,10 +462,10 @@ class MachineFrameInfo { /// Return the alignment of the specified stack object. /// FIXME: Remove this function once transition to Align is over. - unsigned getObjectAlignment(int ObjectIdx) const { - assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && - "Invalid Object Idx!"); - return Objects[ObjectIdx + NumFixedObjects].Alignment.value(); + LLVM_ATTRIBUTE_DEPRECATED(inline unsigned getObjectAlignment(int ObjectIdx) + const, + "Use getObjectAlign instead") { + return getObjectAlign(ObjectIdx).value(); } /// Return the alignment of the specified stack object. @@ -475,18 +475,6 @@ class MachineFrameInfo { return Objects[ObjectIdx + NumFixedObjects].Alignment; } - /// setObjectAlignment - Change the alignment of the specified stack object. - /// FIXME: Remove this function once transition to Align is over. - void setObjectAlignment(int ObjectIdx, unsigned Align) { - assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && - "Invalid Object Idx!"); - Objects[ObjectIdx + NumFixedObjects].Alignment = assumeAligned(Align); - - // Only ensure max alignment for the default stack. - if (getStackID(ObjectIdx) == 0) - ensureMaxAlignment(assumeAligned(Align)); - } - /// setObjectAlignment - Change the alignment of the specified stack object. void setObjectAlignment(int ObjectIdx, Align Alignment) { assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() && @@ -498,6 +486,14 @@ class MachineFrameInfo { ensureMaxAlignment(Alignment); } + /// setObjectAlignment - Change the alignment of the specified stack object. + /// FIXME: Remove this function once transition to Align is over. + LLVM_ATTRIBUTE_DEPRECATED(inline void setObjectAlignment(int ObjectIdx, + unsigned Align), + "Use the version that takes Align instead") { + setObjectAlignment(ObjectIdx, assumeAligned(Align)); + } + /// Return the underlying Alloca of the specified /// stack object if it exists. Returns 0 if none exists. const AllocaInst* getObjectAllocation(int ObjectIdx) const { diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 560d098e0a12..62ad8242ef71 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1114,14 +1114,36 @@ class SelectionDAG { /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not /// less than FIRST_TARGET_MEMORY_OPCODE. SDValue getMemIntrinsicNode( - unsigned Opcode, const SDLoc &dl, SDVTList VTList, - ArrayRef Ops, EVT MemVT, - MachinePointerInfo PtrInfo, - unsigned Align = 0, - MachineMemOperand::Flags Flags - = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, - uint64_t Size = 0, - const AAMDNodes &AAInfo = AAMDNodes()); + unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, + EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()); + + inline SDValue getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, + EVT MemVT, MachinePointerInfo PtrInfo, MaybeAlign Alignment = None, + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) { + // Ensure that codegen never sees alignment 0 + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo, + Alignment.getValueOr(getEVTAlign(MemVT)), Flags, + Size, AAInfo); + } + + LLVM_ATTRIBUTE_DEPRECATED( + inline SDValue getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, + ArrayRef Ops, EVT MemVT, MachinePointerInfo PtrInfo, + unsigned Alignment, + MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()), + "") { + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo, + MaybeAlign(Alignment), Flags, Size, AAInfo); + } SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, EVT MemVT, @@ -1793,9 +1815,17 @@ class SelectionDAG { bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const; - /// Infer alignment of a load / store address. Return 0 if - /// it cannot be inferred. - unsigned InferPtrAlignment(SDValue Ptr) const; + /// Infer alignment of a load / store address. Return None if it cannot be + /// inferred. + MaybeAlign InferPtrAlign(SDValue Ptr) const; + + LLVM_ATTRIBUTE_DEPRECATED(inline unsigned InferPtrAlignment(SDValue Ptr) + const, + "Use InferPtrAlign instead") { + if (auto A = InferPtrAlign(Ptr)) + return A->value(); + return 0; + } /// Compute the VTs needed for the low/hi parts of a type /// which is split (or expanded) into two not necessarily identical pieces. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fefa8daa60a1..99601c436651 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1962,6 +1962,18 @@ class TargetLoweringBase { return ISD::ZERO_EXTEND; } + /// Returns how the platform's atomic compare and swap expects its comparison + /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is + /// separate from getExtendForAtomicOps, which is concerned with the + /// sign-extension of the instruction's output, whereas here we are concerned + /// with the sign-extension of the input. For targets with compare-and-swap + /// instructions (or sub-word comparisons in their LL/SC loop expansions), + /// the input can be ANY_EXTEND, but the output will still have a specific + /// extension. + virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { + return ISD::ANY_EXTEND; + } + /// @} /// Returns true if we should normalize diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 441713c2595d..be5152d09ad2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1309,11 +1309,14 @@ Error JITDylib::define(std::unique_ptr &&MU) { if (MU->getSymbols().empty()) { // Empty MUs are allowable but pathological, so issue a warning. DEBUG_WITH_TYPE("orc", { - dbgs() << "Warning: Discarding empty MU " << MU->getName() << "\n"; + dbgs() << "Warning: Discarding empty MU " << MU->getName() << " for " + << getName() << "\n"; }); return Error::success(); } else - DEBUG_WITH_TYPE("orc", dbgs() << "Defining MU " << MU->getName() << ":\n"); + DEBUG_WITH_TYPE("orc", { + dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n"; + }); return ES.runSessionLocked([&, this]() -> Error { if (auto Err = defineImpl(*MU)) @@ -1340,11 +1343,14 @@ Error JITDylib::define(std::unique_ptr &MU) { if (MU->getSymbols().empty()) { // Empty MUs are allowable but pathological, so issue a warning. DEBUG_WITH_TYPE("orc", { - dbgs() << "Warning: Discarding empty MU " << MU->getName() << "\n"; + dbgs() << "Warning: Discarding empty MU " << MU->getName() << getName() + << "\n"; }); return Error::success(); } else - DEBUG_WITH_TYPE("orc", dbgs() << "Defining MU " << MU->getName() << ":\n"); + DEBUG_WITH_TYPE("orc", { + dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n"; + }); return ES.runSessionLocked([&, this]() -> Error { if (auto Err = defineImpl(*MU)) diff --git a/llvm/include/llvm/MC/LaneBitmask.h b/llvm/include/llvm/MC/LaneBitmask.h index b070bea3201c..a467407f1706 100644 --- a/llvm/include/llvm/MC/LaneBitmask.h +++ b/llvm/include/llvm/MC/LaneBitmask.h @@ -40,7 +40,7 @@ namespace llvm { // When changing the underlying type, change the format string as well. using Type = uint64_t; enum : unsigned { BitWidth = 8*sizeof(Type) }; - constexpr static const char *const FormatStr = "%016lX"; + constexpr static const char *const FormatStr = "%016llX"; constexpr LaneBitmask() = default; explicit constexpr LaneBitmask(Type V) : Mask(V) {} diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h index ea79e68674e5..cad08c8574d2 100644 --- a/llvm/include/llvm/MC/MCDirectives.h +++ b/llvm/include/llvm/MC/MCDirectives.h @@ -16,34 +16,34 @@ namespace llvm { enum MCSymbolAttr { - MCSA_Invalid = 0, ///< Not a valid directive. + MCSA_Invalid = 0, ///< Not a valid directive. // Various directives in alphabetical order. - MCSA_Cold, ///< .cold (MachO) - MCSA_ELF_TypeFunction, ///< .type _foo, STT_FUNC # aka @function - MCSA_ELF_TypeIndFunction, ///< .type _foo, STT_GNU_IFUNC - MCSA_ELF_TypeObject, ///< .type _foo, STT_OBJECT # aka @object - MCSA_ELF_TypeTLS, ///< .type _foo, STT_TLS # aka @tls_object - MCSA_ELF_TypeCommon, ///< .type _foo, STT_COMMON # aka @common - MCSA_ELF_TypeNoType, ///< .type _foo, STT_NOTYPE # aka @notype + MCSA_Cold, ///< .cold (MachO) + MCSA_ELF_TypeFunction, ///< .type _foo, STT_FUNC # aka @function + MCSA_ELF_TypeIndFunction, ///< .type _foo, STT_GNU_IFUNC + MCSA_ELF_TypeObject, ///< .type _foo, STT_OBJECT # aka @object + MCSA_ELF_TypeTLS, ///< .type _foo, STT_TLS # aka @tls_object + MCSA_ELF_TypeCommon, ///< .type _foo, STT_COMMON # aka @common + MCSA_ELF_TypeNoType, ///< .type _foo, STT_NOTYPE # aka @notype MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object - MCSA_Global, ///< .globl - MCSA_LGlobal, ///< .lglobl (XCOFF) - MCSA_Hidden, ///< .hidden (ELF) - MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) - MCSA_Internal, ///< .internal (ELF) - MCSA_LazyReference, ///< .lazy_reference (MachO) - MCSA_Local, ///< .local (ELF) - MCSA_NoDeadStrip, ///< .no_dead_strip (MachO) - MCSA_SymbolResolver, ///< .symbol_resolver (MachO) - MCSA_AltEntry, ///< .alt_entry (MachO) - MCSA_PrivateExtern, ///< .private_extern (MachO) - MCSA_Protected, ///< .protected (ELF) - MCSA_Reference, ///< .reference (MachO) - MCSA_Weak, ///< .weak - MCSA_WeakDefinition, ///< .weak_definition (MachO) - MCSA_WeakReference, ///< .weak_reference (MachO) - MCSA_WeakDefAutoPrivate ///< .weak_def_can_be_hidden (MachO) + MCSA_Global, ///< .globl + MCSA_LGlobal, ///< .lglobl (XCOFF) + MCSA_Hidden, ///< .hidden (ELF) + MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) + MCSA_Internal, ///< .internal (ELF) + MCSA_LazyReference, ///< .lazy_reference (MachO) + MCSA_Local, ///< .local (ELF) + MCSA_NoDeadStrip, ///< .no_dead_strip (MachO) + MCSA_SymbolResolver, ///< .symbol_resolver (MachO) + MCSA_AltEntry, ///< .alt_entry (MachO) + MCSA_PrivateExtern, ///< .private_extern (MachO) + MCSA_Protected, ///< .protected (ELF) + MCSA_Reference, ///< .reference (MachO) + MCSA_Weak, ///< .weak + MCSA_WeakDefinition, ///< .weak_definition (MachO) + MCSA_WeakReference, ///< .weak_reference (MachO) + MCSA_WeakDefAutoPrivate ///< .weak_def_can_be_hidden (MachO) }; enum MCAssemblerFlag { diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index bde0835b6a55..4c8a895592ef 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -259,6 +259,8 @@ class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> { /// The instruction this is a fragment for. MCInst Inst; + /// Can we auto pad the instruction? + bool AllowAutoPadding = false; public: MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI, @@ -269,6 +271,9 @@ class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> { const MCInst &getInst() const { return Inst; } void setInst(const MCInst &Value) { Inst = Value; } + bool getAllowAutoPadding() const { return AllowAutoPadding; } + void setAllowAutoPadding(bool V) { AllowAutoPadding = V; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Relaxable; } diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 252f0c8e212e..1b12a9b23130 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -282,6 +282,11 @@ struct HashSection : Section { Optional> Bucket; Optional> Chain; + // The following members are used to override section fields. + // This is useful for creating invalid objects. + Optional NBucket; + Optional NChain; + HashSection() : Section(ChunkKind::Hash) {} static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Hash; } diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 2113197c067d..e5c50bdfbd5c 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -112,6 +112,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/KnowledgeRetention.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Allocator.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" namespace llvm { @@ -700,11 +701,7 @@ struct Attributor { : Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), DepRecomputeInterval(DepRecomputeInterval), Whitelist(Whitelist) {} - ~Attributor() { - DeleteContainerPointers(AllAbstractAttributes); - for (auto &It : ArgumentReplacementMap) - DeleteContainerPointers(It.second); - } + ~Attributor(); /// Run the analyses until a fixpoint is reached or enforced (timeout). /// @@ -1070,6 +1067,9 @@ struct Attributor { /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } + /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s. + BumpPtrAllocator Allocator; + private: /// Check \p Pred on all call sites of \p Fn. /// diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 2ae44caaaa32..a240571a39da 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -153,21 +153,6 @@ int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, return Cost; } -int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs, - const User *U) const { - int Cost = TTIImpl->getCallCost(FTy, NumArgs, U); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - -int TargetTransformInfo::getCallCost(const Function *F, - ArrayRef Arguments, - const User *U) const { - int Cost = TTIImpl->getCallCost(F, Arguments, U); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index c8ca2052919c..8b98e05a6884 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -397,6 +397,22 @@ bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) { return false; } +void llvm::scaleShuffleMask(size_t Scale, ArrayRef Mask, + SmallVectorImpl &ScaledMask) { + assert(Scale > 0 && "Unexpected scaling factor"); + + // Fast-path: if no scaling, then it is just a copy. + if (Scale == 1) { + ScaledMask.assign(Mask.begin(), Mask.end()); + return; + } + + ScaledMask.clear(); + for (int MaskElt : Mask) + for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt) + ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt); +} + MapVector llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 213af320531c..75487075cb7a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -34,7 +34,7 @@ GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); + return commonAlignment(MFI.getObjectAlign(FrameIdx), Offset); // TODO: How to handle cases with Base + Offset? } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 454a63e674d2..bb144480bbd4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -634,6 +634,7 @@ LegalizerHelper::libcall(MachineInstr &MI) { auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); MIRBuilder.setInstr(MI); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); switch (MI.getOpcode()) { default: @@ -731,6 +732,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { MIRBuilder.setInstr(MI); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); uint64_t NarrowSize = NarrowTy.getSizeInBits(); @@ -1596,6 +1598,7 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MIRBuilder.setInstr(MI); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); switch (MI.getOpcode()) { default: @@ -2188,6 +2191,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; MIRBuilder.setInstr(MI); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); switch(MI.getOpcode()) { default: @@ -3223,6 +3227,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, using namespace TargetOpcode; MIRBuilder.setInstr(MI); + MIRBuilder.setDebugLoc(MI.getDebugLoc()); switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 5022726dc70a..6c5ef0255a08 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -79,11 +79,11 @@ namespace { using StackObjSet = SmallSetVector; void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, - bool StackGrowsDown, unsigned &MaxAlign); + bool StackGrowsDown, Align &MaxAlign); void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet &ProtectedObjs, MachineFrameInfo &MFI, bool StackGrowsDown, - int64_t &Offset, unsigned &MaxAlign); + int64_t &Offset, Align &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); @@ -140,22 +140,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, - int FrameIdx, int64_t &Offset, - bool StackGrowsDown, - unsigned &MaxAlign) { +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, + int64_t &Offset, bool StackGrowsDown, + Align &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI.getObjectAlignment(FrameIdx); + Align Alignment = MFI.getObjectAlign(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = alignTo(Offset, Alignment); int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " @@ -173,11 +172,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. -void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, - SmallSet &ProtectedObjs, - MachineFrameInfo &MFI, - bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign) { +void LocalStackSlotPass::AssignProtectedObjSet( + const StackObjSet &UnassignedObjs, SmallSet &ProtectedObjs, + MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, + Align &MaxAlign) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; @@ -195,7 +193,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; - unsigned MaxAlign = 0; + Align MaxAlign; // Make sure that the stack protector comes before the local variables on the // stack. @@ -262,7 +260,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Remember how big this blob of stack space is MFI.setLocalFrameSize(Offset); - MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign)); + MFI.setLocalFrameMaxAlign(MaxAlign); } static inline bool diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index cad0a8d0899a..135b2f2234af 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -401,8 +401,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, Target.reset(new PerTargetMIParsingState(MF.getSubtarget())); } - if (YamlMF.Alignment) - MF.setAlignment(Align(YamlMF.Alignment)); + MF.setAlignment(YamlMF.Alignment.valueOrOne()); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); @@ -691,7 +690,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Object.ID.SourceRange.Start, Twine("StackID is not supported by target")); MFI.setStackID(ObjectIdx, Object.StackID); - MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment.valueOrOne()); if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) .second) @@ -723,10 +722,11 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Object.ID.SourceRange.Start, Twine("StackID is not supported by target")); if (Object.Type == yaml::MachineStackObject::VariableSized) - ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); + ObjectIdx = + MFI.CreateVariableSizedObject(Object.Alignment.valueOrOne(), Alloca); else ObjectIdx = MFI.CreateStackObject( - Object.Size, Object.Alignment, + Object.Size, Object.Alignment.valueOrOne(), Object.Type == yaml::MachineStackObject::SpillSlot, Alloca, Object.StackID); MFI.setObjectOffset(ObjectIdx, Object.Offset); @@ -838,11 +838,11 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, parseConstantValue(YamlConstant.Value.Value, Error, M)); if (!Value) return error(Error, YamlConstant.Value.SourceRange); - unsigned Alignment = - YamlConstant.Alignment - ? YamlConstant.Alignment - : M.getDataLayout().getPrefTypeAlignment(Value->getType()); - unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); + const Align PrefTypeAlign = + M.getDataLayout().getPrefTypeAlign(Value->getType()); + const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign); + unsigned Index = + ConstantPool.getConstantPoolIndex(Value, Alignment.value()); if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) .second) return error(YamlConstant.ID.SourceRange.Start, diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 22f7e1644a48..58eb720ca799 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -198,7 +198,7 @@ void MIRPrinter::print(const MachineFunction &MF) { yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); - YamlMF.Alignment = MF.getAlignment().value(); + YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); @@ -373,7 +373,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, : yaml::FixedMachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); - YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.Alignment = MFI.getObjectAlign(I); YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); @@ -400,7 +400,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, : yaml::MachineStackObject::DefaultType; YamlObject.Offset = MFI.getObjectOffset(I); YamlObject.Size = MFI.getObjectSize(I); - YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.Alignment = MFI.getObjectAlign(I); YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I); YMF.StackObjects.push_back(YamlObject); @@ -514,7 +514,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, yaml::MachineConstantPoolValue YamlConstant; YamlConstant.ID = ID++; YamlConstant.Value = StrOS.str(); - YamlConstant.Alignment = Constant.getAlignment(); + YamlConstant.Alignment = MaybeAlign(Constant.getAlignment()); YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry(); MF.Constants.push_back(YamlConstant); diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp index a5bea1463468..41b6de1441d7 100644 --- a/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -466,7 +466,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, const MachineFunction &MF = *Before->getMF(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned NeedSize = TRI->getSpillSize(RC); - unsigned NeedAlign = TRI->getSpillAlignment(RC); + Align NeedAlign = TRI->getSpillAlign(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -478,7 +478,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, if (FI < FIB || FI >= FIE) continue; unsigned S = MFI.getObjectSize(FI); - unsigned A = MFI.getObjectAlignment(FI); + Align A = MFI.getObjectAlign(FI); if (NeedSize > S || NeedAlign > A) continue; // Avoid wasting slots with large size and/or large alignment. Pick one @@ -487,7 +487,7 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, // larger register is reserved before a slot for a smaller one. When // trying to spill a smaller register, the large slot would be found // first, thus making it impossible to spill the larger register later. - unsigned D = (S-NeedSize) + (A-NeedAlign); + unsigned D = (S - NeedSize) + (A.value() - NeedAlign.value()); if (D < Diff) { SI = I; Diff = D; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8dc1c538ee39..77fa5c793c8e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13109,8 +13109,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); - if (!DAG.getTarget().Options.UnsafeFPMath && - !Flags.hasApproximateFuncs()) + const TargetOptions &Options = DAG.getTarget().Options; + + // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: + // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN + if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) || + (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); SDValue N0 = N->getOperand(0); @@ -14629,11 +14633,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > LD->getAlign() && + isAligned(*Alignment, LD->getSrcValueOffset())) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getPointerInfo(), LD->getMemoryVT(), *Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); // NewLoad will always be N as we are only refining the alignment assert(NewLoad.getNode() == N); @@ -16695,11 +16700,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > ST->getAlign() && + isAligned(*Alignment, ST->getSrcValueOffset())) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), - ST->getMemoryVT(), Align, + ST->getMemoryVT(), *Alignment, ST->getMemOperand()->getFlags(), ST->getAAInfo()); // NewStore will always be N as we are only refining the alignment assert(NewStore.getNode() == N); @@ -19815,8 +19821,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *InnerSVN = cast(BC0); SmallVector InnerMask; SmallVector OuterMask; - scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask); - scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask); + scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask); + scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask); // Merge the shuffle masks. SmallVector NewMask; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0248b5121e3f..ed67f7dc8ea3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -278,8 +278,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, return Res.getValue(1); } - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + // Op2 is used for the comparison and thus must be extended according to the + // target's atomic operations. Op3 is merely stored and so can be left alone. + SDValue Op2 = N->getOperand(2); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + switch (TLI.getExtendForAtomicCmpSwapArg()) { + case ISD::SIGN_EXTEND: + Op2 = SExtPromotedInteger(Op2); + break; + case ISD::ZERO_EXTEND: + Op2 = ZExtPromotedInteger(Op2); + break; + case ISD::ANY_EXTEND: + Op2 = GetPromotedInteger(Op2); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + SDVTList VTs = DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98bbaefbb584..4c8e95e7b256 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5916,7 +5916,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - MaybeAlign SrcAlign(DAG.InferPtrAlignment(Src)); + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); if (!SrcAlign || Alignment > *SrcAlign) SrcAlign = Alignment; assert(SrcAlign && "SrcAlign must be set"); @@ -6101,7 +6101,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - MaybeAlign SrcAlign(DAG.InferPtrAlignment(Src)); + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); if (!SrcAlign || Alignment > *SrcAlign) SrcAlign = Alignment; assert(SrcAlign && "SrcAlign must be set"); @@ -6679,7 +6679,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment, + EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (!Size && MemVT.isScalableVector()) Size = MemoryLocation::UnknownSize; @@ -6687,9 +6687,8 @@ SDValue SelectionDAG::getMemIntrinsicNode( Size = MemVT.getStoreSize(); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, Flags, Size, Alignment ? Align(Alignment) : getEVTAlign(MemVT), - AAInfo); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -9419,9 +9418,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if -/// it cannot be inferred. -unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { +/// InferPtrAlignment - Infer alignment of a load / store address. Return None +/// if it cannot be inferred. +MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; int64_t GVOffset = 0; @@ -9430,9 +9429,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); - unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; - if (Align) - return MinAlign(Align, GVOffset); + if (AlignBits) + return commonAlignment(Align(1 << std::min(31U, AlignBits)), GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -9450,12 +9448,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return FIInfoAlign; + return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return 0; + return None; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ed24e004f908..624ee71154f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4713,10 +4713,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = DAG.getMemIntrinsicNode( - Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); + Result = + DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -6529,12 +6529,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - Flags); + SDValue Result = DAG.getMemIntrinsicNode( + ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), + /* align */ None, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -7335,10 +7333,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); - unsigned DstAlign = DAG.InferPtrAlignment(Dst); - unsigned SrcAlign = DAG.InferPtrAlignment(Src); + Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne(); + Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne(); // DAG::getMemcpy needs Alignment to be defined. - Align Alignment = assumeAligned(std::min(DstAlign, SrcAlign)); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = false; SDLoc sdl = getCurSDLoc(); @@ -9494,16 +9492,13 @@ static void tryToElideArgumentCopy( "object size\n"); return; } - unsigned RequiredAlignment = AI->getAlignment(); - if (!RequiredAlignment) { - RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment( - AI->getAllocatedType()); - } - if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + Align RequiredAlignment = AI->getAlign().getValueOr( + FuncInfo.MF->getDataLayout().getABITypeAlign(AI->getAllocatedType())); + if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) { LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " "greater than stack argument alignment (" - << RequiredAlignment << " vs " - << MFI.getObjectAlignment(FixedIndex) << ")\n"); + << RequiredAlignment.value() << " vs " + << MFI.getObjectAlign(FixedIndex).value() << ")\n"); return; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index dbbcf10be5a7..6626210e9185 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3708,12 +3708,11 @@ bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast(N->getOperand(0))) { MachineFrameInfo &MFI = MF->getFrameInfo(); - unsigned A = MFI.getObjectAlignment(FN->getIndex()); - assert(isPowerOf2_32(A) && "Unexpected alignment"); + Align A = MFI.getObjectAlign(FN->getIndex()); int32_t Off = C->getSExtValue(); // If the alleged offset fits in the zero bits guaranteed by // the alignment, then this or is really an add. - return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off)); } return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3f0c6443211e..e51555239054 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2762,9 +2762,9 @@ void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, unsigned Depth) const { assert(isa(Op) && "expected FrameIndex"); - if (unsigned Align = DAG.InferPtrAlignment(Op)) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Op)) { // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); + Known.Zero.setLowBits(Log2(*Alignment)); } } diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 9d4fdc6b624c..5ed5e8a46b08 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -1290,8 +1290,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SortedSlots[J] = -1; LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" << SecondSlot << " together.\n"); - unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), - MFI->getObjectAlignment(SecondSlot)); + Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot), + MFI->getObjectAlign(SecondSlot)); assert(MFI->getObjectSize(FirstSlot) >= MFI->getObjectSize(SecondSlot) && diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 7ae758323280..3cc5d30ebad7 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -74,7 +74,7 @@ namespace { SmallVector, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. - SmallVector OrigAlignments; + SmallVector OrigAlignments; // OrigSizes - Sizess of stack objects before coloring. SmallVector OrigSizes; @@ -227,7 +227,7 @@ void StackSlotColoring::InitializeSlots() { continue; SSIntervals.push_back(&li); - OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigAlignments[FI] = MFI->getObjectAlign(FI); OrigSizes[FI] = MFI->getObjectSize(FI); auto StackID = MFI->getStackID(FI); @@ -309,9 +309,9 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment // are large enough for all. - unsigned Align = OrigAlignments[FI]; - if (!Share || Align > MFI->getObjectAlignment(Color)) - MFI->setObjectAlignment(Color, Align); + Align Alignment = OrigAlignments[FI]; + if (!Share || Alignment > MFI->getObjectAlign(Color)) + MFI->setObjectAlignment(Color, Alignment); int64_t Size = OrigSizes[FI]; if (!Share || Size > MFI->getObjectSize(Color)) MFI->setObjectSize(Color, Size); diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index a98445a2295e..50f1ca3fe3df 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -349,8 +349,8 @@ Error StaticLibraryDefinitionGenerator::tryToGenerate( MemoryBufferRef ChildBufferRef(ChildBufferInfo.first, ChildBufferInfo.second); - if (auto Err = - L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey())) + if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef, false), + VModuleKey())) return Err; } diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 3be1381652a1..9a868a6fbac3 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -29,14 +29,6 @@ using namespace llvm::orc; namespace { -/// Add a reference to the __dso_handle global to the given module. -/// Returns a reference to the __dso_handle IR decl. -GlobalVariable *addDSOHandleDecl(Module &M) { - auto DSOHandleTy = StructType::create(M.getContext(), "lljit.dso_handle"); - return new GlobalVariable(M, DSOHandleTy, true, GlobalValue::ExternalLinkage, - nullptr, "__dso_handle"); -} - /// Adds helper function decls and wrapper functions that call the helper with /// some additional prefix arguments. /// @@ -143,11 +135,10 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { SymbolMap StdInterposes; StdInterposes[Mangle("__lljit.platform_support_instance")] = - JITEvaluatedSymbol(pointerToJITTargetAddress(this), JITSymbolFlags()); + JITEvaluatedSymbol(pointerToJITTargetAddress(this), + JITSymbolFlags::Exported); StdInterposes[Mangle("__lljit.cxa_atexit_helper")] = JITEvaluatedSymbol( pointerToJITTargetAddress(registerAtExitHelper), JITSymbolFlags()); - StdInterposes[Mangle("__lljit.run_atexits_helper")] = JITEvaluatedSymbol( - pointerToJITTargetAddress(runAtExitsHelper), JITSymbolFlags()); cantFail( J.getMainJITDylib().define(absoluteSymbols(std::move(StdInterposes)))); @@ -159,6 +150,14 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { /// Adds a module that defines the __dso_handle global. Error setupJITDylib(JITDylib &JD) { + + // Add per-jitdylib standard interposes. + MangleAndInterner Mangle(getExecutionSession(), J.getDataLayout()); + SymbolMap PerJDInterposes; + PerJDInterposes[Mangle("__lljit.run_atexits_helper")] = JITEvaluatedSymbol( + pointerToJITTargetAddress(runAtExitsHelper), JITSymbolFlags()); + cantFail(JD.define(absoluteSymbols(std::move(PerJDInterposes)))); + auto Ctx = std::make_unique(); auto M = std::make_unique("__standard_lib", *Ctx); M->setDataLayout(J.getDataLayout()); @@ -168,9 +167,23 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { *M, Int64Ty, true, GlobalValue::ExternalLinkage, ConstantInt::get(Int64Ty, reinterpret_cast(&JD)), "__dso_handle"); - DSOHandle->setVisibility(GlobalValue::HiddenVisibility); + DSOHandle->setVisibility(GlobalValue::DefaultVisibility); DSOHandle->setInitializer( ConstantInt::get(Int64Ty, pointerToJITTargetAddress(&JD))); + + auto *GenericIRPlatformSupportTy = + StructType::create(*Ctx, "lljit.GenericLLJITIRPlatformSupport"); + + auto *PlatformInstanceDecl = new GlobalVariable( + *M, GenericIRPlatformSupportTy, true, GlobalValue::ExternalLinkage, + nullptr, "__lljit.platform_support_instance"); + + auto *VoidTy = Type::getVoidTy(*Ctx); + addHelperAndWrapper( + *M, "__lljit_run_atexits", FunctionType::get(VoidTy, {}, false), + GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper", + {PlatformInstanceDecl, DSOHandle}); + return J.addIRModule(JD, ThreadSafeModule(std::move(M), std::move(Ctx))); } @@ -316,6 +329,16 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { } }); + LLVM_DEBUG({ + dbgs() << "JITDylib deinit order is [ "; + for (auto *JD : DFSLinkOrder) + dbgs() << "\"" << JD->getName() << "\" "; + dbgs() << "]\n"; + dbgs() << "Looking up deinit functions:\n"; + for (auto &KV : LookupSymbols) + dbgs() << " \"" << KV.first->getName() << "\": " << KV.second << "\n"; + }); + auto LookupResult = Platform::lookupInitSymbols(ES, LookupSymbols); if (!LookupResult) @@ -387,11 +410,19 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { static void registerAtExitHelper(void *Self, void (*F)(void *), void *Ctx, void *DSOHandle) { + LLVM_DEBUG({ + dbgs() << "Registering atexit function " << (void *)F << " for JD " + << (*static_cast(DSOHandle))->getName() << "\n"; + }); static_cast(Self)->AtExitMgr.registerAtExit( F, Ctx, DSOHandle); } static void runAtExitsHelper(void *Self, void *DSOHandle) { + LLVM_DEBUG({ + dbgs() << "Running atexit functions for JD " + << (*static_cast(DSOHandle))->getName() << "\n"; + }); static_cast(Self)->AtExitMgr.runAtExits( DSOHandle); } @@ -410,8 +441,6 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { *M, GenericIRPlatformSupportTy, true, GlobalValue::ExternalLinkage, nullptr, "__lljit.platform_support_instance"); - auto *DSOHandleDecl = addDSOHandleDecl(*M); - auto *Int8Ty = Type::getInt8Ty(*Ctx); auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT); auto *VoidTy = Type::getVoidTy(*Ctx); @@ -423,14 +452,9 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { *M, "__cxa_atexit", FunctionType::get(IntTy, {AtExitCallbackPtrTy, BytePtrTy, BytePtrTy}, false), - GlobalValue::HiddenVisibility, "__lljit.cxa_atexit_helper", + GlobalValue::DefaultVisibility, "__lljit.cxa_atexit_helper", {PlatformInstanceDecl}); - addHelperAndWrapper( - *M, "__lljit_run_atexits", FunctionType::get(VoidTy, {}, false), - GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper", - {PlatformInstanceDecl, DSOHandleDecl}); - return ThreadSafeModule(std::move(M), std::move(Ctx)); } @@ -676,7 +700,7 @@ class MachOPlatformSupport : public LLJIT::PlatformSupport { auto *DSOHandle = new GlobalVariable(M, Int64Ty, true, GlobalValue::ExternalLinkage, ConstantInt::get(Int64Ty, 0), "__dso_handle"); - DSOHandle->setVisibility(GlobalValue::HiddenVisibility); + DSOHandle->setVisibility(GlobalValue::DefaultVisibility); return cantFail(J.getIRCompileLayer().getCompiler()(M)); } diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index b8c57533568b..eabaaa203927 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1196,6 +1196,10 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); + // Use APInt's implementation of AND for single element ranges. + if (isSingleElement() && Other.isSingleElement()) + return {*getSingleElement() & *Other.getSingleElement()}; + // TODO: replace this with something less conservative APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()); @@ -1207,6 +1211,10 @@ ConstantRange::binaryOr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); + // Use APInt's implementation of OR for single element ranges. + if (isSingleElement() && Other.isSingleElement()) + return {*getSingleElement() | *Other.getSingleElement()}; + // TODO: replace this with something less conservative APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a31a91766ebe..b0b9af9ff573 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4791,6 +4791,42 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call); break; } + case Intrinsic::matrix_multiply: + case Intrinsic::matrix_transpose: + case Intrinsic::matrix_columnwise_load: + case Intrinsic::matrix_columnwise_store: { + ConstantInt *NumRows; + ConstantInt *NumColumns; + VectorType *TypeToCheck; + switch (ID) { + case Intrinsic::matrix_multiply: + NumRows = cast(Call.getArgOperand(2)); + NumColumns = cast(Call.getArgOperand(4)); + TypeToCheck = cast(Call.getType()); + break; + case Intrinsic::matrix_transpose: + NumRows = cast(Call.getArgOperand(1)); + NumColumns = cast(Call.getArgOperand(2)); + TypeToCheck = cast(Call.getType()); + break; + case Intrinsic::matrix_columnwise_load: + NumRows = cast(Call.getArgOperand(2)); + NumColumns = cast(Call.getArgOperand(3)); + TypeToCheck = cast(Call.getType()); + break; + case Intrinsic::matrix_columnwise_store: + NumRows = cast(Call.getArgOperand(3)); + NumColumns = cast(Call.getArgOperand(4)); + TypeToCheck = cast(Call.getArgOperand(0)->getType()); + break; + default: + llvm_unreachable("unexpected intrinsic"); + } + Assert(TypeToCheck->getNumElements() == + NumRows->getZExtValue() * NumColumns->getZExtValue(), + "result of a matrix operation does not fit in the returned vector"); + break; + } }; } diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp index 098b3d8f8dd0..7879e2ef651c 100644 --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -108,14 +108,17 @@ Triple ObjectFile::makeTriple() const { setARMSubArch(TheTriple); // TheTriple defaults to ELF, and COFF doesn't have an environment: - // the best we can do here is indicate that it is mach-o. - if (isMachO()) + // something we can do here is indicate that it is mach-o. + if (isMachO()) { TheTriple.setObjectFormat(Triple::MachO); - - if (isCOFF()) { + } else if (isCOFF()) { const auto COFFObj = cast(this); if (COFFObj->getArch() == Triple::thumb) TheTriple.setTriple("thumbv7-windows"); + } else if (isXCOFF()) { + // XCOFF implies AIX. + TheTriple.setOS(Triple::AIX); + TheTriple.setObjectFormat(Triple::XCOFF); } return TheTriple; diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index b48c377debda..71e21b90344d 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1065,10 +1065,13 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, return; } - support::endian::write(OS, Section.Bucket->size(), - ELFT::TargetEndianness); - support::endian::write(OS, Section.Chain->size(), - ELFT::TargetEndianness); + support::endian::write( + OS, Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())), + ELFT::TargetEndianness); + support::endian::write( + OS, Section.NChain.getValueOr(llvm::yaml::Hex64(Section.Chain->size())), + ELFT::TargetEndianness); + for (uint32_t Val : *Section.Bucket) support::endian::write(OS, Val, ELFT::TargetEndianness); for (uint32_t Val : *Section.Chain) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 7221d9b5736a..5adcb25dcec4 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1091,6 +1091,13 @@ static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) { IO.mapOptional("Bucket", Section.Bucket); IO.mapOptional("Chain", Section.Chain); IO.mapOptional("Size", Section.Size); + + // obj2yaml does not dump these fields. They can be used to override nchain + // and nbucket values for creating broken sections. + assert(!IO.outputting() || + (!Section.NBucket.hasValue() && !Section.NChain.hasValue())); + IO.mapOptional("NChain", Section.NChain); + IO.mapOptional("NBucket", Section.NBucket); } static void sectionMapping(IO &IO, ELFYAML::NoteSection &Section) { diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp index 93be0535d1b9..df955cdf5d30 100644 --- a/llvm/lib/Support/ELFAttributeParser.cpp +++ b/llvm/lib/Support/ELFAttributeParser.cpp @@ -217,7 +217,7 @@ Error ELFAttributeParser::parse(ArrayRef section, if (sectionLength < 4 || cursor.tell() - 4 + sectionLength > section.size()) return createStringError(errc::invalid_argument, - "invalid subsection length " + + "invalid section length " + Twine(sectionLength) + " at offset 0x" + utohexstr(cursor.tell() - 4)); diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc index dfe81d7e2833..a68b30a546c8 100644 --- a/llvm/lib/Support/Unix/Process.inc +++ b/llvm/lib/Support/Unix/Process.inc @@ -280,7 +280,7 @@ bool Process::FileDescriptorIsDisplayed(int fd) { #endif } -static unsigned getColumns(int FileID) { +static unsigned getColumns() { // If COLUMNS is defined in the environment, wrap to that many columns. if (const char *ColumnsStr = std::getenv("COLUMNS")) { int Columns = std::atoi(ColumnsStr); @@ -288,31 +288,23 @@ static unsigned getColumns(int FileID) { return Columns; } - unsigned Columns = 0; - -#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) \ - && !(defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)) - // Try to determine the width of the terminal. - struct winsize ws; - if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) - Columns = ws.ws_col; -#endif - - return Columns; + // We used to call ioctl TIOCGWINSZ to determine the width. It is considered + // unuseful. + return 0; } unsigned Process::StandardOutColumns() { if (!StandardOutIsDisplayed()) return 0; - return getColumns(1); + return getColumns(); } unsigned Process::StandardErrColumns() { if (!StandardErrIsDisplayed()) return 0; - return getColumns(2); + return getColumns(); } #ifdef HAVE_TERMINFO diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 10a540f8bfa6..17b13f6f96fb 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -216,6 +216,24 @@ AArch64FrameLowering::getStackIDForScalableVectors() const { return TargetStackID::SVEVector; } +/// Returns the size of the fixed object area (allocated next to sp on entry) +/// On Win64 this may include a var args area and an UnwindHelp object for EH. +static unsigned getFixedObjectSize(const MachineFunction &MF, + const AArch64FunctionInfo *AFI, bool IsWin64, + bool IsFunclet) { + if (!IsWin64 || IsFunclet) { + // Only Win64 uses fixed objects, and then only for the function (not + // funclets) + return 0; + } else { + // Var args are stored here in the primary function. + const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); + // To support EH funclets we allocate an UnwindHelp object + const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); + return alignTo(VarArgsArea + UnwindHelpObject, 16); + } +} + /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); @@ -995,10 +1013,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = (IsWin64 && !IsFunclet) ? - alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. @@ -1029,32 +1044,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; } - // The code below is not applicable to funclets. We have emitted all the SEH - // opcodes that we needed to emit. The FP and BP belong to the containing - // function. - if (IsFunclet) { - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - - // SEH funclets are passed the frame pointer in X1. If the parent - // function uses the base register, then the base register is used - // directly, and is not retrieved from X1. - if (F.hasPersonalityFn()) { - EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); - if (isAsynchronousEHPersonality(Per)) { - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) - .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup); - MBB.addLiveIn(AArch64::X1); - } - } - - return; - } - - if (HasFP) { + // For funclets the FP belongs to the containing function. + if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; @@ -1197,7 +1188,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Allocate space for the rest of the frame. if (NumBytes) { - const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + !IsFunclet && RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { @@ -1250,7 +1243,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - if (RegInfo->hasBasePointer(MF)) { + // For funclets the BP belongs to the containing function. + if (!IsFunclet && RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); if (NeedsWinCFI) { @@ -1267,6 +1261,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // SEH funclets are passed the frame pointer in X1. If the parent + // function uses the base register, then the base register is used + // directly, and is not retrieved from X1. + if (IsFunclet && F.hasPersonalityFn()) { + EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); + if (isAsynchronousEHPersonality(Per)) { + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) + .addReg(AArch64::X1) + .setMIFlag(MachineInstr::FrameSetup); + MBB.addLiveIn(AArch64::X1); + } + } + if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = isTargetDarwin(MF) @@ -1485,10 +1492,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = - (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -1714,7 +1718,9 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + + unsigned FixedObject = + getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo()); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; @@ -2093,8 +2099,8 @@ static void computeCalleeSaveRegisterPairs( FixupDone = true; ByteOffset -= 8; assert(ByteOffset % 16 == 0); - assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); - MFI.setObjectAlignment(RPI.FrameIdx, 16); + assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); + MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); } int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset; @@ -2584,12 +2590,12 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, // Then process all callee saved slots. if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { // Make sure to align the last callee save slot. - MFI.setObjectAlignment(MaxCSFrameIndex, 16U); + MFI.setObjectAlignment(MaxCSFrameIndex, Align(16)); // Assign offsets to the callee save slots. for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { Offset += MFI.getObjectSize(I); - Offset = alignTo(Offset, MFI.getObjectAlignment(I)); + Offset = alignTo(Offset, MFI.getObjectAlign(I)); if (AssignOffsets) Assign(I, -Offset); } @@ -2611,15 +2617,15 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, // Allocate all SVE locals and spills for (unsigned FI : ObjectsToAllocate) { - unsigned Align = MFI.getObjectAlignment(FI); + Align Alignment = MFI.getObjectAlign(FI); // FIXME: Given that the length of SVE vectors is not necessarily a power of // two, we'd need to align every object dynamically at runtime if the // alignment is larger than 16. This is not yet supported. - if (Align > 16) + if (Alignment > Align(16)) report_fatal_error( "Alignment of scalable vectors > 16 bytes is not yet supported"); - Offset = alignTo(Offset + MFI.getObjectSize(FI), Align); + Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); if (AssignOffsets) Assign(FI, -Offset); } @@ -2667,9 +2673,14 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( ++MBBI; // Create an UnwindHelp object. - int UnwindHelpFI = - MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + // The UnwindHelp object is allocated at the start of the fixed object area + int64_t FixedObject = + getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); + int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, + /*SPOffset*/ -FixedObject, + /*IsImmutable=*/false); EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + // We need to store -2 into the UnwindHelp object at the start of the // function. DebugLoc DL; @@ -3081,10 +3092,14 @@ int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " - << MFI.getObjectOffset(FI) << "\n"); - FrameReg = AArch64::SP; - return MFI.getObjectOffset(FI); + if (IgnoreSPUpdates) { + LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); + } + + return getFrameIndexReference(MF, FI, FrameReg); } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 698189e14c21..61b78acad3f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -55,6 +55,9 @@ struct ImageDimIntrinsicInfo { }; const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr); +const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode, + unsigned Dim); + } // end AMDGPU namespace } // End llvm namespace diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9e1fb426116b..c9b065cdd8d6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1340,7 +1340,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( } // TODO: Check this in verifier. - assert(!IsTexFail || DMaskLanes >= 1 && "should have legalized this"); + assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this"); bool GLC = false; bool SLC = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c88569604227..5ee3267822b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2075,7 +2075,7 @@ bool AMDGPULegalizerInfo::legalizeFMad( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setMBB(*MI.getParent()); + HelperBuilder.setInstr(MI); return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; } @@ -3722,27 +3722,32 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( const ConstantFP *ConstantLod; const int LodIdx = AddrIdx + NumVAddrs - 1; - // FIXME: This isn't the cleanest way to handle this, but it's the easiest - // option the current infrastructure gives. We really should be changing the - // base intrinsic opcode, but the current searchable tables only gives us - // the final MI opcode. Eliminate the register here, and track with an - // immediate 0 so the final selection will know to do the opcode change. if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { - MI.getOperand(LodIdx).ChangeToImmediate(0); + // Set new opcode to _lz variant of _l, and change the intrinsic ID. + ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode( + LZMappingInfo->LZ, ImageDimIntr->Dim); + + // The starting indexes should remain in the same place. + --NumVAddrs; --CorrectedNumVAddrs; + + MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID( + static_cast(ImageDimIntr->Intr)); + MI.RemoveOperand(LodIdx); } } } // Optimize _mip away, when 'lod' is zero - if (const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo = - AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { int64_t ConstantLod; const int LodIdx = AddrIdx + NumVAddrs - 1; if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) { if (ConstantLod == 0) { + // TODO: Change intrinsic opcode and remove operand instead or replacing + // it with 0, as the _L to _LZ handling is done above. MI.getOperand(LodIdx).ChangeToImmediate(0); --CorrectedNumVAddrs; } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 8a439425062e..c4dbb65ee5b8 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -817,6 +817,11 @@ def ImageDimIntrinsicTable : GenericTable { let PrimaryKeyEarlyOut = 1; } +def getImageDimInstrinsicByBaseOpcode : SearchIndex { + let Table = ImageDimIntrinsicTable; + let Key = ["BaseOpcode", "Dim"]; +} + foreach intr = !listconcat(AMDGPUImageDimIntrinsics, AMDGPUImageDimAtomicIntrinsics) in { def : ImageDimIntrinsicInfo; diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp index d9aa9ebe878d..d2fe3c9f93c6 100644 --- a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -35,15 +35,15 @@ int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, int UpperBound = FI == -1 ? MFI.getNumObjects() : FI; for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) { - OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i)); + OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlign(i)); OffsetBytes += MFI.getObjectSize(i); // Each register holds 4 bytes, so we must always align the offset to at // least 4 bytes, so that 2 frame objects won't share the same register. - OffsetBytes = alignTo(OffsetBytes, 4); + OffsetBytes = alignTo(OffsetBytes, Align(4)); } if (FI != -1) - OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI)); + OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlign(FI)); return OffsetBytes / (getStackWidth(MF) * 4); } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 8a3dea8152f1..3f36150d2a3c 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -503,16 +503,16 @@ bool isUncondBranchOpcode(int Opc) { // the ArmARM. -inline static unsigned getARMVPTBlockMask(unsigned NumInsts) { +inline static ARM::PredBlockMask getARMVPTBlockMask(unsigned NumInsts) { switch (NumInsts) { case 1: - return ARMVCC::T; + return ARM::PredBlockMask::T; case 2: - return ARMVCC::TT; + return ARM::PredBlockMask::TT; case 3: - return ARMVCC::TTT; + return ARM::PredBlockMask::TTT; case 4: - return ARMVCC::TTTT; + return ARM::PredBlockMask::TTTT; default: break; }; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index f424f22464e5..3c6f446580bb 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1191,8 +1191,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, // Only multiples of 4 are allowed for the offset, so the frame object // alignment must be at least 4. MachineFrameInfo &MFI = MF->getFrameInfo(); - if (MFI.getObjectAlignment(FI) < 4) - MFI.setObjectAlignment(FI, 4); + if (MFI.getObjectAlign(FI) < Align(4)) + MFI.setObjectAlignment(FI, Align(4)); Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); @@ -1215,9 +1215,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, if (RHSC * 4 < MFI.getObjectSize(FI)) { // For LHS+RHS to result in an offset that's a multiple of 4 the object // indexed by the LHS must be 4-byte aligned. - if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4) - MFI.setObjectAlignment(FI, 4); - if (MFI.getObjectAlignment(FI) >= 4) { + if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) + MFI.setObjectAlignment(FI, Align(4)); + if (MFI.getObjectAlign(FI) >= Align(4)) { Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); @@ -3420,8 +3420,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Set the alignment of the frame object to 4, to avoid having to generate // more than one ADD MachineFrameInfo &MFI = MF->getFrameInfo(); - if (MFI.getObjectAlignment(FI) < 4) - MFI.setObjectAlignment(FI, 4); + if (MFI.getObjectAlign(FI) < Align(4)) + MFI.setObjectAlignment(FI, Align(4)); CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, CurDAG->getTargetConstant(0, dl, MVT::i32)); return; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6d03126b2db0..860dfce691a3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2272,9 +2272,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, - MachinePointerInfo(), - DAG.InferPtrAlignment(AddArg)); + SDValue Load = + DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), + DAG.InferPtrAlign(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index b9572ef3e9d0..4ccf62759f5a 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1198,7 +1198,7 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { if (isVCTP(Divergent->MI)) { // The vctp will be removed, so the size of the vpt block needs to be // modified. - uint64_t Size = getARMVPTBlockMask(Block.size() - 1); + uint64_t Size = (uint64_t)getARMVPTBlockMask(Block.size() - 1); Block.getVPST()->getOperand(0).setImm(Size); LLVM_DEBUG(dbgs() << "ARM Loops: Modified VPT block mask.\n"); } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) { @@ -1227,7 +1227,7 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt, InsertAt->getDebugLoc(), TII->get(ARM::MVE_VPST)); - MIB.addImm(getARMVPTBlockMask(Size)); + MIB.addImm((uint64_t)getARMVPTBlockMask(Size)); LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST()); LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); LoLoop.ToRemove.insert(Block.getVPST()); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 80450983e513..9304d8339193 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6980,6 +6980,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // ITx -> x100 (ITT -> 0100, ITE -> 1100) // ITxy -> xy10 (e.g. ITET -> 1010) // ITxyz -> xyz1 (e.g. ITEET -> 1101) + // Note: See the ARM::PredBlockMask enum in + // /lib/Target/ARM/Utils/ARMBaseInfo.h if (Mnemonic == "it" || Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst")) { SMLoc Loc = Mnemonic == "it" ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) : diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index c8b725f339e2..2de9829480db 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -34,30 +34,30 @@ using namespace llvm; #define DEBUG_TYPE "arm-mve-vpt" namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; - const Thumb2InstrInfo *TII; - const TargetRegisterInfo *TRI; +class MVEVPTBlock : public MachineFunctionPass { +public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; - MVEVPTBlock() : MachineFunctionPass(ID) {} + MVEVPTBlock() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &Fn) override; + bool runOnMachineFunction(MachineFunction &Fn) override; - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); - }; +private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); +}; - char MVEVPTBlock::ID = 0; +char MVEVPTBlock::ID = 0; } // end anonymous namespace @@ -94,24 +94,184 @@ static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, return &*CmpMI; } +static ARM::PredBlockMask ExpandBlockMask(ARM::PredBlockMask BlockMask, + ARMVCC::VPTCodes Kind) { + using PredBlockMask = ARM::PredBlockMask; + assert(Kind != ARMVCC::None && "Cannot expand mask with 'None'"); + assert(countTrailingZeros((unsigned)BlockMask) != 0 && + "Mask is already full"); + + auto ChooseMask = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) { + return (Kind == ARMVCC::Then) ? AddedThen : AddedElse; + }; + + switch (BlockMask) { + case PredBlockMask::T: + return ChooseMask(PredBlockMask::TT, PredBlockMask::TE); + case PredBlockMask::TT: + return ChooseMask(PredBlockMask::TTT, PredBlockMask::TTE); + case PredBlockMask::TE: + return ChooseMask(PredBlockMask::TET, PredBlockMask::TEE); + case PredBlockMask::TTT: + return ChooseMask(PredBlockMask::TTTT, PredBlockMask::TTTE); + case PredBlockMask::TTE: + return ChooseMask(PredBlockMask::TTET, PredBlockMask::TTEE); + case PredBlockMask::TET: + return ChooseMask(PredBlockMask::TETT, PredBlockMask::TETE); + case PredBlockMask::TEE: + return ChooseMask(PredBlockMask::TEET, PredBlockMask::TEEE); + default: + llvm_unreachable("Unknown Mask"); + } +} + +// Advances Iter past a block of predicated instructions. +// Returns true if it successfully skipped the whole block of predicated +// instructions. Returns false when it stopped early (due to MaxSteps), or if +// Iter didn't point to a predicated instruction. +static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + unsigned MaxSteps, + unsigned &NumInstrsSteppedOver) { + ARMVCC::VPTCodes NextPred = ARMVCC::None; + unsigned PredReg; + NumInstrsSteppedOver = 0; + + while (Iter != EndIter) { + NextPred = getVPTInstrPredicate(*Iter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred == ARMVCC::None || MaxSteps == 0) + break; + --MaxSteps; + ++Iter; + ++NumInstrsSteppedOver; + }; + + return NumInstrsSteppedOver != 0 && + (NextPred == ARMVCC::None || Iter == EndIter); +} + +// Returns true if at least one instruction in the range [Iter, End) defines +// or kills VPR. +static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, + MachineBasicBlock::iterator End) { + for (; Iter != End; ++Iter) + if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) + return true; + return false; +} + +// Given an iterator (Iter) that points at an instruction with a "Then" +// predicate, tries to create the largest block of continuous predicated +// instructions possible, and returns the VPT Block Mask of that block. +// +// This will try to perform some minor optimization in order to maximize the +// size of the block. +static ARM::PredBlockMask +CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, + MachineBasicBlock::instr_iterator EndIter, + SmallVectorImpl &DeadInstructions) { + MachineBasicBlock::instr_iterator BlockBeg = Iter; + (void)BlockBeg; + assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && + "Expected a Predicated Instruction"); + + LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); + + unsigned BlockSize; + StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); + + LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = + std::next(BlockBeg); + AddedInstIter != Iter; ++AddedInstIter) { + dbgs() << " adding: "; + AddedInstIter->dump(); + }); + + // Generate the initial BlockMask + ARM::PredBlockMask BlockMask = getARMVPTBlockMask(BlockSize); + + // Remove VPNOTs while there's still room in the block, so we can make the + // largest block possible. + ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then; + while (BlockSize < 4 && Iter != EndIter && + Iter->getOpcode() == ARM::MVE_VPNOT) { + + // Try to skip all of the predicated instructions after the VPNOT, stopping + // after (4 - BlockSize). If we can't skip them all, stop. + unsigned ElseInstCnt = 0; + MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); + if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), + ElseInstCnt)) + break; + + // Check if this VPNOT can be removed or not: It can only be removed if at + // least one of the predicated instruction that follows it kills or sets + // VPR. + if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) + break; + + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + + // Record the new size of the block + BlockSize += ElseInstCnt; + assert(BlockSize <= 4 && "Block is too large!"); + + // Record the VPNot to remove it later. + DeadInstructions.push_back(&*Iter); + ++Iter; + + // Replace "then" by "elses" in the block until we find an instruction that + // defines VPR, then after that leave everything to "t". + // Note that we are using "Iter" to iterate over the block so we can update + // it at the same time. + bool ChangeToElse = (CurrentPredicate == ARMVCC::Then); + for (; Iter != VPNOTBlockEndIter; ++Iter) { + // Find the register in which the predicate is + int OpIdx = findFirstVPTPredOperandIdx(*Iter); + assert(OpIdx != -1); + + // Update the mask + change the predicate to an else if needed. + if (ChangeToElse) { + // Change the predicate and update the mask + Iter->getOperand(OpIdx).setImm(ARMVCC::Else); + BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else); + // Reset back to a "then" predicate if this instruction defines VPR. + if (Iter->definesRegister(ARM::VPR)) + ChangeToElse = false; + } else + BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then); + + LLVM_DEBUG(dbgs() << " adding: "; Iter->dump()); + } + + CurrentPredicate = + (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); + } + return BlockMask; +} + bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); + SmallVector DeadInstructions; + while (MBIter != EndIter) { MachineInstr *MI = &*MBIter; unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); // The idea of the predicate is that None, Then and Else are for use when // handling assembly language: they correspond to the three possible // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. + // from assembly source or disassembled from object code, you expect to + // see a mixture whenever there's a long VPT block. But in code + // generation, we hope we'll never generate an Else as input to this pass. assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (Pred == ARMVCC::None) { @@ -119,42 +279,25 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { continue; } - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - // Look at subsequent instructions, checking if they can be in the same VPT - // block. - ++MBIter; - while (MBIter != EndIter && VPTInstCnt < 4) { - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - assert(NextPred != ARMVCC::Else && - "VPT block pass does not expect Else preds"); - if (NextPred != Pred) - break; - LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); - ++VPTInstCnt; - ++MBIter; - }; - - unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt); + ARM::PredBlockMask BlockMask = + CreateVPTBlock(MBIter, EndIter, DeadInstructions); - // Search back for a VCMP that can be folded to create a VPT, or else create - // a VPST directly + // Search back for a VCMP that can be folded to create a VPT, or else + // create a VPST directly MachineInstrBuilder MIBuilder; unsigned NewOpcode; - MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); - if (VCMP) { + LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); + if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); - MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); + MIBuilder.addImm((uint64_t)BlockMask); MIBuilder.add(VCMP->getOperand(1)); MIBuilder.add(VCMP->getOperand(2)); MIBuilder.add(VCMP->getOperand(3)); VCMP->eraseFromParent(); } else { - MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); - MIBuilder.addImm(BlockMask); + MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm((uint64_t)BlockMask); } finalizeBundle( @@ -162,6 +305,15 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { Modified = true; } + + // Erase all dead instructions + for (MachineInstr *DeadMI : DeadInstructions) { + if (DeadMI->isInsideBundle()) + DeadMI->eraseFromBundle(); + else + DeadMI->eraseFromParent(); + } + return Modified; } diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 3e8e77a9db1f..583a09163f4e 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -74,6 +74,10 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg); int findFirstVPTPredOperandIdx(const MachineInstr &MI); ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, unsigned &PredReg); +inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { + unsigned PredReg; + return getVPTInstrPredicate(MI, PredReg); } +} // namespace llvm #endif diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h index a7578ee172ca..b2e434fbd78a 100644 --- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h @@ -91,25 +91,35 @@ namespace ARMVCC { Then, Else }; - - enum VPTMaskValue { - T = 8, // 0b1000 - TT = 4, // 0b0100 - TE = 12, // 0b1100 - TTT = 2, // 0b0010 - TTE = 6, // 0b0110 - TEE = 10, // 0b1010 - TET = 14, // 0b1110 - TTTT = 1, // 0b0001 - TTTE = 3, // 0b0011 - TTEE = 5, // 0b0101 - TTET = 7, // 0b0111 - TEEE = 9, // 0b1001 - TEET = 11, // 0b1011 - TETT = 13, // 0b1101 - TETE = 15 // 0b1111 +} // namespace ARMVCC + +namespace ARM { + /// Mask values for IT and VPT Blocks, to be used by MCOperands. + /// Note that this is different from the "real" encoding used by the + /// instructions. In this encoding, the lowest set bit indicates the end of + /// the encoding, and above that, "1" indicates an else, while "0" indicates + /// a then. + /// Tx = x100 + /// Txy = xy10 + /// Txyz = xyz1 + enum class PredBlockMask { + T = 0b1000, + TT = 0b0100, + TE = 0b1100, + TTT = 0b0010, + TTE = 0b0110, + TEE = 0b1110, + TET = 0b1010, + TTTT = 0b0001, + TTTE = 0b0011, + TTEE = 0b0111, + TTET = 0b0101, + TEEE = 0b1111, + TEET = 0b1101, + TETT = 0b1001, + TETE = 0b1011 }; -} +} // namespace ARM inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) { switch (CC) { diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index ebd060ce503e..1e4030b84bc1 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -330,7 +330,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case PS_fi: { int FI = op(1).getIndex(); int Off = op(2).getImm(); - unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off); + unsigned A = MFI.getObjectAlign(FI).value() + std::abs(Off); unsigned L = countTrailingZeros(A); RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); RC.fill(0, L, BT::BitValue::Zero); diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 9c39d0bf844f..a9cfbdc3c6fc 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -645,15 +645,15 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, auto &HMFI = *MF.getInfo(); for (int i = HMFI.getFirstNamedArgFrameIndex(), e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) { - int ObjSize = MFI.getObjectSize(i); - int ObjAlign = MFI.getObjectAlignment(i); + uint64_t ObjSize = MFI.getObjectSize(i); + Align ObjAlign = MFI.getObjectAlign(i); // Determine the kind of load/store that should be used. unsigned LDOpc, STOpc; - int OpcodeChecker = ObjAlign; + uint64_t OpcodeChecker = ObjAlign.value(); // Handle cases where alignment of an object is > its size. - if (ObjSize < ObjAlign) { + if (ObjAlign > ObjSize) { if (ObjSize <= 1) OpcodeChecker = 1; else if (ObjSize <= 2) @@ -702,17 +702,17 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, while (Count < LoadStoreCount) { // Load the value of the named argument on stack. BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed) - .addReg(SP) - .addImm(RegisterSavedAreaSizePlusPadding + - ObjAlign * Count + NumBytes) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(SP) + .addImm(RegisterSavedAreaSizePlusPadding + + ObjAlign.value() * Count + NumBytes) + .setMIFlag(MachineInstr::FrameSetup); // Store it below the register saved area plus padding. BuildMI(MBB, InsertPt, dl, HII.get(STOpc)) - .addReg(SP) - .addImm(ObjAlign * Count + NumBytes) - .addReg(RegUsed) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(SP) + .addImm(ObjAlign.value() * Count + NumBytes) + .addReg(RegUsed) + .setMIFlag(MachineInstr::FrameSetup); Count++; } @@ -1520,8 +1520,8 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( unsigned S = MFI.getObjectSize(i); // Reduce the alignment to at most 8. This will require unaligned vector // stores if they happen here. - unsigned A = std::max(MFI.getObjectAlignment(i), 8U); - MFI.setObjectAlignment(i, 8); + Align A = std::max(MFI.getObjectAlign(i), Align(8)); + MFI.setObjectAlignment(i, Align(8)); LFS = alignTo(LFS+S, A); MFI.mapLocalFrameObject(i, -static_cast(LFS)); DealignSlots.insert(i); @@ -1934,11 +1934,11 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, bool NeedsAligna = needsAligna(MF); unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); - unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); - unsigned HasAlign = MFI.getObjectAlignment(FI); + Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass); + Align HasAlign = MFI.getObjectAlign(FI); unsigned StoreOpc; - auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) { + auto UseAligned = [&](Align NeedAlign, Align HasAlign) { return !NeedsAligna && (NeedAlign <= HasAlign); }; @@ -1986,11 +1986,11 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, bool NeedsAligna = needsAligna(MF); unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); - unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); - unsigned HasAlign = MFI.getObjectAlignment(FI); + Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass); + Align HasAlign = MFI.getObjectAlign(FI); unsigned LoadOpc; - auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) { + auto UseAligned = [&](Align NeedAlign, Align HasAlign) { return !NeedsAligna && (NeedAlign <= HasAlign); }; @@ -2030,8 +2030,8 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); - unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); - unsigned HasAlign = MFI.getObjectAlignment(FI); + Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass); + Align HasAlign = MFI.getObjectAlign(FI); bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign); unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai; @@ -2060,8 +2060,8 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); - unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); - unsigned HasAlign = MFI.getObjectAlignment(FI); + Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass); + Align HasAlign = MFI.getObjectAlign(FI); bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign); unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai; diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 1d1744a49b3a..08967a534bf9 100644 --- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -266,7 +266,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n" << "alignment : " - << MF.getFrameInfo().getObjectAlignment(FrameIndex) + << MF.getFrameInfo().getObjectAlign(FrameIndex).value() << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 83074ca22768..2ef583c0a799 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -218,11 +218,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // covered by the vector op. Otherwise, it returns 1. static unsigned CanMergeParamLoadStoresStartingAt( unsigned Idx, uint32_t AccessSize, const SmallVectorImpl &ValueVTs, - const SmallVectorImpl &Offsets, unsigned ParamAlignment) { - assert(isPowerOf2_32(AccessSize) && "must be a power of 2!"); + const SmallVectorImpl &Offsets, Align ParamAlignment) { // Can't vectorize if param alignment is not sufficient. - if (AccessSize > ParamAlignment) + if (ParamAlignment < AccessSize) return 1; // Can't vectorize if offset is not aligned. if (Offsets[Idx] & (AccessSize - 1)) @@ -282,7 +281,7 @@ enum ParamVectorizationFlags { static SmallVector VectorizePTXValueVTs(const SmallVectorImpl &ValueVTs, const SmallVectorImpl &Offsets, - unsigned ParamAlignment) { + Align ParamAlignment) { // Set vector size to match ValueVTs and mark all elements as // scalars by default. SmallVector VectorInfo; @@ -1243,7 +1242,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { std::string NVPTXTargetLowering::getPrototype( const DataLayout &DL, Type *retTy, const ArgListTy &Args, - const SmallVectorImpl &Outs, unsigned retAlignment, + const SmallVectorImpl &Outs, MaybeAlign retAlignment, ImmutableCallSite CS) const { auto PtrVT = getPointerTy(DL); @@ -1279,8 +1278,8 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << PtrVT.getSizeInBits() << " _"; } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - O << ".param .align " << retAlignment << " .b8 _[" - << DL.getTypeAllocSize(retTy) << "]"; + O << ".param .align " << (retAlignment ? retAlignment->value() : 0) + << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { llvm_unreachable("Unknown return type"); } @@ -1353,16 +1352,16 @@ std::string NVPTXTargetLowering::getPrototype( return O.str(); } -unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, - ImmutableCallSite CS, - Type *Ty, unsigned Idx, - const DataLayout &DL) const { +Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, + ImmutableCallSite CS, Type *Ty, + unsigned Idx, + const DataLayout &DL) const { if (!CS) { // CallSite is zero, fallback to ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } - unsigned Align = 0; + unsigned Alignment = 0; const Value *DirectCallee = CS.getCalledFunction(); if (!DirectCallee) { @@ -1374,8 +1373,8 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // With bitcast'd call targets, the instruction will be the call if (isa(CalleeI)) { // Check if we have call alignment metadata - if (getAlign(*cast(CalleeI), Idx, Align)) - return Align; + if (getAlign(*cast(CalleeI), Idx, Alignment)) + return Align(Alignment); const Value *CalleeV = cast(CalleeI)->getCalledValue(); // Ignore any bitcast instructions @@ -1397,12 +1396,12 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // Check for function alignment information if we found that the // ultimate target is a Function if (DirectCallee) - if (getAlign(*cast(DirectCallee), Idx, Align)) - return Align; + if (getAlign(*cast(DirectCallee), Idx, Alignment)) + return Align(Alignment); // Call is indirect or alignment information is not available, fall back to // the ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -1450,15 +1449,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector VTs; SmallVector Offsets; ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); - unsigned ArgAlign = - getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); + Align ArgAlign = getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); unsigned AllocSize = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); bool NeedAlign; // Does argument declaration specify alignment? if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // declare .param .align .b8 .param[]; SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, @@ -1539,8 +1537,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust type of the store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; - unsigned EltAlign = - NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; + MaybeAlign EltAlign; + if (NeedAlign) + EltAlign = commonAlignment(ArgAlign, Offsets[j]); Chain = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, @@ -1604,10 +1603,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(curOffset, dl, MVT::i32), theVal, InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, elemtype, - MachinePointerInfo(), /* Align */ 0, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, + MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore); InFlag = Chain.getValue(1); } @@ -1615,7 +1613,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); - unsigned retAlignment = 0; + MaybeAlign retAlignment = None; // Handle Result if (Ins.size() > 0) { @@ -1644,11 +1642,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); } else { retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + assert(retAlignment && "retAlignment is guaranteed to be set"); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, dl, MVT::i32), - DAG.getConstant(resultsz / 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { + Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), + DAG.getConstant(resultsz / 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps); InFlag = Chain.getValue(1); @@ -1754,7 +1753,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); - unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + Align RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); SmallVector LoadVTs; @@ -1770,7 +1769,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool needTruncate = false; EVT TheLoadType = VTs[i]; EVT EltType = Ins[i].VT; - unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); + Align EltAlign = commonAlignment(RetAlign, Offsets[i]); if (ExtendIntegerRetVal) { TheLoadType = MVT::i32; EltType = MVT::i32; @@ -2545,7 +2544,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); assert(VTs.size() > 0 && "Unexpected empty type."); auto VectorInfo = - VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); + VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty)); SDValue Arg = getParamSymbol(DAG, idx, PtrVT); int VecIdx = -1; // Index of the first element of the current vector. @@ -2664,7 +2663,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); auto VectorInfo = VectorizePTXValueVTs( - VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); + VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1)); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -2716,10 +2715,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Adjust type of load/store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; - Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), - StoreOperands, TheStoreType, - MachinePointerInfo(), /* Align */ 1, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, + MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); // Cleanup vector state. StoreOperands.clear(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 546fe49808e2..986ad70ed80c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -491,8 +491,7 @@ class NVPTXTargetLowering : public TargetLowering { std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl &, - unsigned retAlignment, - ImmutableCallSite CS) const; + MaybeAlign retAlignment, ImmutableCallSite CS) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, @@ -579,8 +578,8 @@ class NVPTXTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty, - unsigned Idx, const DataLayout &DL) const; + Align getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty, + unsigned Idx, const DataLayout &DL) const; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 49c2790d7caa..d6e0bc285b3a 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -60,6 +60,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() { createPPCLEDisassembler); } +static DecodeStatus decodeCondBrTarget(MCInst &Inst, unsigned Imm, + uint64_t /*Address*/, + const void * /*Decoder*/) { + Inst.addOperand(MCOperand::createImm(SignExtend32<14>(Imm))); + return MCDisassembler::Success; +} + static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder) { diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 523bbe7dd367..a233dc9b3474 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4213,7 +4213,7 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); + unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); if ((SlotAlign % Val) != 0) return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 6f596f8ef867..3223efbdc661 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2417,8 +2417,7 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - if (Align >= 4) + if (MFI.getObjectAlign(FrameIdx) >= Align(4)) return; PPCFunctionInfo *FuncInfo = MF.getInfo(); @@ -2750,7 +2749,7 @@ SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), None, MachineMemOperand::MOLoad); } @@ -13768,7 +13767,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, (Op1VT == MVT::i32 || Op1VT == MVT::i64 || (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); - if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() || + if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() || cast(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 6c48512d8fb6..3102b9089817 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -764,7 +764,9 @@ def PPCCondBrAsmOperand : AsmOperandClass { def condbrtarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getCondBrEncoding"; + let DecoderMethod = "decodeCondBrTarget"; let ParserMatchClass = PPCCondBrAsmOperand; + let OperandType = "OPERAND_PCREL"; } def abscondbrtarget : Operand { let PrintMethod = "printAbsBranchOperand"; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f66d06c20e37..66f943c634fe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -10,55 +10,18 @@ // //===----------------------------------------------------------------------===// +#include "RISCVISelDAGToDAG.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "RISCV.h" -#include "RISCVTargetMachine.h" #include "Utils/RISCVMatInt.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; #define DEBUG_TYPE "riscv-isel" -// RISCV-specific code to select RISCV machine instructions for -// SelectionDAG operations. -namespace { -class RISCVDAGToDAGISel final : public SelectionDAGISel { - const RISCVSubtarget *Subtarget = nullptr; - -public: - explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) - : SelectionDAGISel(TargetMachine) {} - - StringRef getPassName() const override { - return "RISCV DAG->DAG Pattern Instruction Selection"; - } - - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget(); - return SelectionDAGISel::runOnMachineFunction(MF); - } - - void PostprocessISelDAG() override; - - void Select(SDNode *Node) override; - - bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, - std::vector &OutOps) override; - - bool SelectAddrFI(SDValue Addr, SDValue &Base); - -// Include the pieces autogenerated from the target description. -#include "RISCVGenDAGISel.inc" - -private: - void doPeepholeLoadStoreADDI(); -}; -} - void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h new file mode 100644 index 000000000000..dcf733ec3675 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -0,0 +1,56 @@ +//===---- RISCVISelDAGToDAG.h - A dag to dag inst selector for RISCV ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the RISCV target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVISELDAGTODAG_H +#define LLVM_LIB_TARGET_RISCV_RISCVISELDAGTODAG_H + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +// RISCV-specific code to select RISCV machine instructions for +// SelectionDAG operations. +namespace llvm { +class RISCVDAGToDAGISel : public SelectionDAGISel { + const RISCVSubtarget *Subtarget = nullptr; + +public: + explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) + : SelectionDAGISel(TargetMachine) {} + + StringRef getPassName() const override { + return "RISCV DAG->DAG Pattern Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + void PostprocessISelDAG() override; + + void Select(SDNode *Node) override; + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + + bool SelectAddrFI(SDValue Addr, SDValue &Base); + +// Include the pieces autogenerated from the target description. +#include "RISCVGenDAGISel.inc" + +private: + void doPeepholeLoadStoreADDI(); +}; +} + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 929169dd62d9..f76abf22e4db 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -129,6 +129,10 @@ class RISCVTargetLowering : public TargetLowering { return ISD::SIGN_EXTEND; } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override { + return ISD::SIGN_EXTEND; + } + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { if (DAG.getMachineFunction().getFunction().hasMinSize()) return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index b4354e852194..b186e32e788d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -79,7 +79,6 @@ template <> bool ConcreteRegion::isLoop() const { return true; } class RegionInfo { const MachineLoopInfo &MLI; const WebAssemblyExceptionInfo &WEI; - std::vector Regions; DenseMap> LoopMap; DenseMap> ExceptionMap; @@ -93,7 +92,14 @@ class RegionInfo { const auto *WE = WEI.getExceptionFor(MBB); if (!ML && !WE) return nullptr; - if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) { + // We determine subregion relationship by domination of their headers, i.e., + // if region A's header dominates region B's header, B is a subregion of A. + // WebAssemblyException contains BBs in all its subregions (loops or + // exceptions), but MachineLoop may not, because MachineLoop does not contain + // BBs that don't have a path to its header even if they are dominated by + // its header. So here we should use WE->contains(ML->getHeader()), but not + // ML->contains(WE->getHeader()). + if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) { // If the smallest region containing MBB is a loop if (LoopMap.count(ML)) return LoopMap[ML].get(); @@ -368,6 +374,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, const Region *Region = RI.getRegionFor(&MBB); if (Region && &MBB == Region->getHeader()) { + // Region header. if (Region->isLoop()) { // Loop header. The loop predecessor should be sorted above, and the // other predecessors should be backedges below. @@ -377,7 +384,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, "Loop header predecessors must be loop predecessors or " "backedges"); } else { - // Not a loop header. All predecessors should be sorted above. + // Exception header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); @@ -386,7 +393,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, "Regions should be declared at most once."); } else { - // Not a loop header. All predecessors should be sorted above. + // Not a region header. All predecessors should be sorted above. for (auto Pred : MBB.predecessors()) assert(Pred->getNumber() < MBB.getNumber() && "Non-loop-header predecessors should be topologically sorted"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 89265a3a9520..c8878a48b243 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -344,6 +344,21 @@ static std::string getSignature(FunctionType *FTy) { return Sig; } +static void markAsImported(Function *F) { + // Tell the linker that this function is expected to be imported from the + // 'env' module. + if (!F->hasFnAttribute("wasm-import-module")) { + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", "env"); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (!F->hasFnAttribute("wasm-import-name")) { + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", F->getName()); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + } +} + // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. // This is because a landingpad instruction contains two more arguments, a // personality function and a cleanup bit, and __cxa_find_matching_catch_N @@ -360,6 +375,7 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, Function *F = Function::Create( FTy, GlobalValue::ExternalLinkage, "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); + markAsImported(F); FindMatchingCatches[NumClauses] = F; return F; } @@ -469,6 +485,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { CalleeFTy->isVarArg()); Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M); + markAsImported(F); InvokeWrappers[Sig] = F; return F; } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 9a832c1bb16d..c2eb78bd056d 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -136,9 +136,11 @@ class X86AsmBackend : public MCAsmBackend { bool needAlign(MCObjectStreamer &OS) const; bool needAlignInst(const MCInst &Inst) const; + bool allowAutoPaddingForInst(const MCInst &Inst, MCObjectStreamer &OS) const; MCInst PrevInst; MCBoundaryAlignFragment *PendingBoundaryAlign = nullptr; std::pair PrevInstPosition; + bool AllowAutoPaddingForInst; public: X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) @@ -538,13 +540,8 @@ static size_t getSizeForInstFragment(const MCFragment *F) { } } -/// Check if the instruction operand needs to be aligned. Padding is disabled -/// before intruction which may be rewritten by linker(e.g. TLSCALL). +/// Check if the instruction operand needs to be aligned. bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { - // Linker may rewrite the instruction with variant symbol operand. - if (hasVariantSymbol(Inst)) - return false; - const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode()); return (InstDesc.isConditionalBranch() && (AlignBranchType & X86::AlignBranchJcc)) || @@ -558,31 +555,53 @@ bool X86AsmBackend::needAlignInst(const MCInst &Inst) const { (AlignBranchType & X86::AlignBranchIndirect)); } -/// Insert BoundaryAlignFragment before instructions to align branches. -void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, - const MCInst &Inst) { - if (!needAlign(OS)) - return; +/// Return true if we can insert NOP or prefixes automatically before the +/// the instruction to be emitted. +bool X86AsmBackend::allowAutoPaddingForInst(const MCInst &Inst, + MCObjectStreamer &OS) const { + if (hasVariantSymbol(Inst)) + // Linker may rewrite the instruction with variant symbol operand(e.g. + // TLSCALL). + return false; if (hasInterruptDelaySlot(PrevInst)) // If this instruction follows an interrupt enabling instruction with a one // instruction delay, inserting a nop would change behavior. - return; + return false; if (isPrefix(PrevInst, *MCII)) - // If this instruction follows a prefix, inserting a nop would change + // If this instruction follows a prefix, inserting a nop/prefix would change // semantic. - return; + return false; + + if (isPrefix(Inst, *MCII)) + // If this instruction is a prefix, inserting a prefix would change + // semantic. + return false; if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) // If this instruction follows any data, there is no clear - // instruction boundary, inserting a nop would change semantic. + // instruction boundary, inserting a nop/prefix would change semantic. + return false; + + return true; +} + +/// Insert BoundaryAlignFragment before instructions to align branches. +void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, + const MCInst &Inst) { + AllowAutoPaddingForInst = allowAutoPaddingForInst(Inst, OS); + + if (!needAlign(OS)) return; if (!isMacroFused(PrevInst, Inst)) // Macro fusion doesn't happen indeed, clear the pending. PendingBoundaryAlign = nullptr; + if (!AllowAutoPaddingForInst) + return; + if (PendingBoundaryAlign && OS.getCurrentFragment()->getPrevNode() == PendingBoundaryAlign) { // Macro fusion actually happens and there is no other fragment inserted @@ -617,12 +636,14 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, /// Set the last fragment to be aligned for the BoundaryAlignFragment. void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { - if (!needAlign(OS)) - return; - PrevInst = Inst; MCFragment *CF = OS.getCurrentFragment(); PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); + if (auto *F = dyn_cast_or_null(CF)) + F->setAllowAutoPadding(AllowAutoPaddingForInst); + + if (!needAlign(OS)) + return; if (!needAlignInst(Inst) || !PendingBoundaryAlign) return; @@ -827,12 +848,6 @@ static bool isFullyRelaxed(const MCRelaxableFragment &RF) { return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); } - -static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { - // Linker may rewrite the instruction with variant symbol operand. - return !hasVariantSymbol(Inst); -} - static unsigned getRemainingPrefixSize(const MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter &Emitter) { @@ -856,7 +871,7 @@ static unsigned getRemainingPrefixSize(const MCInst &Inst, bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) const { - if (!shouldAddPrefix(RF.getInst(), *MCII)) + if (!RF.getAllowAutoPadding()) return false; // If the instruction isn't fully relaxed, shifting it around might require a // larger value for one of the fixups then can be encoded. The outer loop diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index f394fa403a1c..0e8e94e311d4 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2012,7 +2012,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // Skip the saved EBP. return Offset + SlotSize + FPDelta; } else { - assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0); + assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); return Offset + StackSize; } } else if (TRI->needsStackRealignment(MF)) { @@ -2020,7 +2020,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // Skip the saved EBP. return Offset + SlotSize + FPDelta; } else { - assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0); + assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); return Offset + StackSize; } // FIXME: Support tail calls @@ -3203,7 +3203,7 @@ struct X86FrameSortingObject { bool IsValid = false; // true if we care about this Object. unsigned ObjectIndex = 0; // Index of Object into MFI list. unsigned ObjectSize = 0; // Size of Object in bytes. - unsigned ObjectAlignment = 1; // Alignment of Object in bytes. + Align ObjectAlignment = Align(1); // Alignment of Object in bytes. unsigned ObjectNumUses = 0; // Object static number of uses. }; @@ -3288,7 +3288,7 @@ void X86FrameLowering::orderFrameObjects( for (auto &Obj : ObjectsToAllocate) { SortingObjects[Obj].IsValid = true; SortingObjects[Obj].ObjectIndex = Obj; - SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj); + SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); // Set the size. int ObjectSize = MFI.getObjectSize(Obj); if (ObjectSize == 0) @@ -3381,7 +3381,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( int FrameIndex = H.CatchObj.FrameIndex; if (FrameIndex != INT_MAX) { // Ensure alignment. - unsigned Align = MFI.getObjectAlignment(FrameIndex); + unsigned Align = MFI.getObjectAlign(FrameIndex).value(); MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align; MinFixedObjOffset -= MFI.getObjectSize(FrameIndex); MFI.setObjectOffset(FrameIndex, MinFixedObjOffset); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e6116d644f97..2f3d6e809037 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1185,7 +1185,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDVTList VTs = CurDAG->getVTList(MVT::Other); SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, - MPI, /*Align*/ 0, + MPI, /*Align*/ None, MachineMemOperand::MOStore); if (N->getFlags().hasNoFPExcept()) { SDNodeFlags Flags = Store->getFlags(); @@ -1201,9 +1201,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (!DstIsSSE) { SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); SDValue Ops[] = {Store, MemTmp}; - Result = - CurDAG->getMemIntrinsicNode(X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, - /*Align*/ 0, MachineMemOperand::MOLoad); + Result = CurDAG->getMemIntrinsicNode( + X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, + /*Align*/ None, MachineMemOperand::MOLoad); if (N->getFlags().hasNoFPExcept()) { SDNodeFlags Flags = Result->getFlags(); Flags.setNoFPExcept(true); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 45cdfa9450d0..eba4db960f51 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6133,6 +6133,35 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { return SDValue(); } +void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, + bool Lo, bool Unary) { + assert(Mask.empty() && "Expected an empty shuffle mask vector"); + int NumElts = VT.getVectorNumElements(); + int NumEltsInLane = 128 / VT.getScalarSizeInBits(); + for (int i = 0; i < NumElts; ++i) { + unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; + int Pos = (i % NumEltsInLane) / 2 + LaneStart; + Pos += (Unary ? 0 : NumElts * (i % 2)); + Pos += (Lo ? 0 : NumEltsInLane / 2); + Mask.push_back(Pos); + } +} + +/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation +/// imposed by AVX and specific to the unary pattern. Example: +/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> +/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> +void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, + bool Lo) { + assert(Mask.empty() && "Expected an empty shuffle mask vector"); + int NumElts = VT.getVectorNumElements(); + for (int i = 0; i < NumElts; ++i) { + int Pos = i / 2; + Pos += (Lo ? 0 : NumElts / 2); + Mask.push_back(Pos); + } +} + /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { @@ -7320,8 +7349,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; - scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); - scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); + scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); + scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); @@ -7379,7 +7408,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, if ((NumSubElts % SubMask.size()) == 0) { int Scale = NumSubElts / SubMask.size(); SmallVector ScaledSubMask; - scaleShuffleMask(Scale, SubMask, ScaledSubMask); + scaleShuffleMask(Scale, SubMask, ScaledSubMask); SubMask = ScaledSubMask; } else { int Scale = SubMask.size() / NumSubElts; @@ -7522,7 +7551,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, case ISD::TRUNCATE: case X86ISD::VTRUNC: { SDValue Src = N.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); + EVT SrcVT = Src.getValueType(); + // Truncated source must be a simple vector. + if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || + (SrcVT.getScalarSizeInBits() % 8) != 0) + return false; unsigned NumSrcElts = SrcVT.getVectorNumElements(); unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits(); unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt; @@ -8100,11 +8133,11 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, // FIXME: 256-bit vector instructions don't require a strict alignment, // improve this code to support it better. - unsigned RequiredAlign = VT.getSizeInBits()/8; + Align RequiredAlign(VT.getSizeInBits() / 8); SDValue Chain = LD->getChain(); // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { + if (DAG.InferPtrAlign(Ptr) < RequiredAlign) { if (MFI.isFixedObjectIndex(FI)) { // Can't change the alignment. FIXME: It's possible to compute // the exact stack offset and reference FI + adjust offset instead. @@ -8119,9 +8152,9 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, // Ptr + (Offset & ~15). if (Offset < 0) return SDValue(); - if ((Offset % RequiredAlign) & 3) + if ((Offset % RequiredAlign.value()) & 3) return SDValue(); - int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1); + int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1); if (StartOffset) { SDLoc DL(Ptr); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -8390,11 +8423,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, if (TLI.isTypeLegal(VecVT)) { SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, - LDBase->getPointerInfo(), - LDBase->getAlignment(), - MachineMemOperand::MOLoad); + SDValue ResNode = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), + LDBase->getAlign(), MachineMemOperand::MOLoad); for (auto *LD : Loads) if (LD) DAG.makeEquivalentMemoryOrdering(LD, ResNode); @@ -8636,7 +8667,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; - unsigned Alignment = cast(CP)->getAlignment(); + MaybeAlign Alignment(cast(CP)->getAlignment()); SDVTList Tys = DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other); SDValue Ops[] = {DAG.getEntryNode(), CP}; @@ -8724,7 +8755,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast(CP)->getAlignment(); + MaybeAlign Alignment(cast(CP)->getAlignment()); SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = {DAG.getEntryNode(), CP}; @@ -11297,20 +11328,21 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask, // X86 has dedicated pack instructions that can handle specific truncation // operations: PACKSS and PACKUS. +// TODO: Add support for matching multiple PACKSS/PACKUS stages. static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, unsigned &PackOpcode, ArrayRef TargetMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { unsigned NumElts = VT.getVectorNumElements(); unsigned BitSize = VT.getScalarSizeInBits(); - MVT PackSVT = MVT::getIntegerVT(BitSize * 2); - MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2); - auto MatchPACK = [&](SDValue N1, SDValue N2) { + auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) { + unsigned NumSrcBits = PackVT.getScalarSizeInBits(); + unsigned NumPackedBits = NumSrcBits - BitSize; SDValue VV1 = DAG.getBitcast(PackVT, N1); SDValue VV2 = DAG.getBitcast(PackVT, N2); - if (Subtarget.hasSSE41() || PackSVT == MVT::i16) { - APInt ZeroMask = APInt::getHighBitsSet(BitSize * 2, BitSize); + if (Subtarget.hasSSE41() || BitSize == 8) { + APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits); if ((N1.isUndef() || DAG.MaskedValueIsZero(VV1, ZeroMask)) && (N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask))) { V1 = VV1; @@ -11320,8 +11352,8 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, return true; } } - if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > BitSize) && - (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize)) { + if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > NumPackedBits) && + (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > NumPackedBits)) { V1 = VV1; V2 = VV2; SrcVT = PackVT; @@ -11331,18 +11363,21 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, return false; }; + MVT PackSVT = MVT::getIntegerVT(BitSize * 2); + MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2); + // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false); if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2)) - if (MatchPACK(V1, V2)) + if (MatchPACK(V1, V2, PackVT)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true); if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1)) - if (MatchPACK(V1, V1)) + if (MatchPACK(V1, V1, PackVT)) return true; return false; @@ -16279,7 +16314,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { SmallVector PSHUFDMask; - scaleShuffleMask(2, RepeatedMask, PSHUFDMask); + scaleShuffleMask(2, RepeatedMask, PSHUFDMask); return DAG.getBitcast( MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, @@ -16928,7 +16963,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, SmallVector Widened256Mask; if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) { Widened128Mask.clear(); - llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask); + llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask); } // Try to lower to vshuf64x2/vshuf32x4. @@ -17079,7 +17114,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef Mask, SmallVector Repeated128Mask; if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { SmallVector PSHUFDMask; - scaleShuffleMask(2, Repeated128Mask, PSHUFDMask); + scaleShuffleMask(2, Repeated128Mask, PSHUFDMask); return DAG.getBitcast( MVT::v8i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, @@ -17216,6 +17251,11 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) return V; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = + lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -17237,13 +17277,13 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef Mask, // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v32 case. - return lowerV8I16GeneralSingleInputShuffle( - DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG); + return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1, + RepeatedMask, Subtarget, DAG); } } if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, @@ -19214,15 +19254,16 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); unsigned Size = SrcVT.getStoreSize(); + Align Alignment(Size); MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); - int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); + int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Size); + Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment); std::pair Tmp = - BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Size, DAG); + BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG); if (IsStrict) return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); @@ -19232,7 +19273,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, std::pair X86TargetLowering::BuildFILD( EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, - MachinePointerInfo PtrInfo, unsigned Alignment, SelectionDAG &DAG) const { + MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const { // Build the FILD SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(DstVT); @@ -19525,8 +19566,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; SDValue VBias = DAG.getMemIntrinsicNode( X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - /*Alignment*/ 8, MachineMemOperand::MOLoad); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8), + MachineMemOperand::MOLoad); SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn, DAG.getBitcast(MVT::v4i64, VBias)); @@ -19705,7 +19746,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MPI.getWithOffset(4), 4); std::pair Tmp = - BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, 8, DAG); + BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, Align(8), DAG); if (IsStrict) return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); @@ -19721,7 +19762,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); } SDValue Store = - DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, 8 /*Align*/); + DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8)); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, @@ -19729,9 +19770,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // in SSE. (The generic code can't know it's OK to do this, or how to.) SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot }; - SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, - MVT::i64, MPI, 8 /*Align*/, - MachineMemOperand::MOLoad); + SDValue Fild = + DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI, + Align(8), MachineMemOperand::MOLoad); Chain = Fild.getValue(1); @@ -20161,7 +20202,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits. SmallVector Mask; int Scale = 64 / OutVT.getScalarSizeInBits(); - scaleShuffleMask(Scale, ArrayRef({ 0, 2, 1, 3 }), Mask); + scaleShuffleMask(Scale, { 0, 2, 1, 3 }, Mask); Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); if (DstVT.is256BitVector()) @@ -20682,14 +20723,13 @@ SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, SDValue Ops[] = { Chain, StackPtr }; Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, - /*Align*/0, MachineMemOperand::MOLoad); + /*Align*/ None, MachineMemOperand::MOLoad); Chain = Src.getValue(1); } SDValue StoreOps[] = { Chain, Src, StackPtr }; - Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, - DAG.getVTList(MVT::Other), StoreOps, - DstVT, MPI, /*Align*/0, + Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other), + StoreOps, DstVT, MPI, /*Align*/ None, MachineMemOperand::MOStore); return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); @@ -23589,11 +23629,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode( - X86ISD::VAARG_64, dl, - VTs, InstOps, MVT::i64, - MachinePointerInfo(SV), - /*Align=*/0, - MachineMemOperand::MOLoad | MachineMemOperand::MOStore); + X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), + /*Align=*/None, MachineMemOperand::MOLoad | MachineMemOperand::MOStore); Chain = VAARG.getValue(1); // Load the next argument and return it @@ -25775,10 +25812,10 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Ops[] = {Chain, StackSlot}; Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI, - 2 /*Align*/, MachineMemOperand::MOStore); + Align(2), MachineMemOperand::MOStore); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, 2 /*Align*/); + SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2)); Chain = CWD.getValue(1); // Mask and turn the control bits into a shift for the lookup table. @@ -28453,7 +28490,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, SDValue LdOps[] = {Chain, StackPtr}; SDValue Value = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, - /*Align*/ 0, MachineMemOperand::MOLoad); + /*Align*/ None, MachineMemOperand::MOLoad); Chain = Value.getValue(1); // Now use an FIST to do the atomic store. @@ -29864,10 +29901,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); SDValue StoreOps[] = { Chain, Result, StackPtr }; - Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, - DAG.getVTList(MVT::Other), StoreOps, - MVT::i64, MPI, 0 /*Align*/, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, + MPI, None /*Align*/, MachineMemOperand::MOStore); // Finally load the value back from the stack temporary and return it. // This load is not atomic and doesn't need to be. @@ -33607,7 +33643,7 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, // Narrow the repeated mask to create 32-bit element permutes. SmallVector WordMask = RepeatedMask; if (MaskScalarSizeInBits == 64) - scaleShuffleMask(2, RepeatedMask, WordMask); + scaleShuffleMask(2, RepeatedMask, WordMask); Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); @@ -34060,7 +34096,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (BaseMaskEltSizeInBits > 64) { assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); int MaskScale = BaseMaskEltSizeInBits / 64; - scaleShuffleMask(MaskScale, BaseMask, Mask); + scaleShuffleMask(MaskScale, BaseMask, Mask); } else { Mask = SmallVector(BaseMask.begin(), BaseMask.end()); } @@ -35287,11 +35323,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(MVT::v2f64, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::f64, - LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::f64, LN->getPointerInfo(), + LN->getAlign(), LN->getMemOperand()->getFlags()); SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad); DCI.CombineTo(N.getNode(), Movddup); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); @@ -35389,11 +35423,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue BcastLd = - DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, - MVT::i16, LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue BcastLd = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, + LN->getPointerInfo(), LN->getAlign(), + LN->getMemOperand()->getFlags()); DCI.CombineTo(N.getNode(), BcastLd); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); DCI.recursivelyDeleteUnusedNodes(LN); @@ -35434,12 +35467,11 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), Offset, DL); SDValue Ops[] = { LN->getChain(), Ptr }; - SDValue BcastLd = - DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, - MVT::i16, - LN->getPointerInfo().getWithOffset(Offset), - MinAlign(LN->getAlignment(), Offset), - LN->getMemOperand()->getFlags()); + SDValue BcastLd = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, + LN->getPointerInfo().getWithOffset(Offset), + commonAlignment(LN->getAlign(), Offset), + LN->getMemOperand()->getFlags()); DCI.CombineTo(N.getNode(), BcastLd); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); DCI.recursivelyDeleteUnusedNodes(LN); @@ -35472,11 +35504,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue BcastLd = - DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, - MVT::f64, LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue BcastLd = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, + LN->getPointerInfo(), LN->getAlign(), + LN->getMemOperand()->getFlags()); DCI.CombineTo(N.getNode(), BcastLd); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); DCI.recursivelyDeleteUnusedNodes(LN); @@ -36237,12 +36268,10 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - VT.getVectorElementType(), - LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, dl, Tys, Ops, VT.getVectorElementType(), + LN->getPointerInfo(), LN->getAlign(), + LN->getMemOperand()->getFlags()); DCI.CombineTo(N, VZLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); DCI.recursivelyDeleteUnusedNodes(LN); @@ -38184,7 +38213,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if ((NumSrcElts % Mask.size()) == 0) { SmallVector ScaledMask; int Scale = NumSrcElts / Mask.size(); - scaleShuffleMask(Scale, Mask, ScaledMask); + scaleShuffleMask(Scale, Mask, ScaledMask); Mask = std::move(ScaledMask); } else if ((Mask.size() % NumSrcElts) == 0) { // Simplify Mask based on demanded element. @@ -39752,6 +39781,81 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { return SDValue(); } +/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC +/// to avoid the inversion. +static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, + SelectionDAG &DAG) { + // TODO: Handle X86ISD::KTEST/X86ISD::KORTEST. + if (EFLAGS.getOpcode() != X86ISD::PTEST && + EFLAGS.getOpcode() != X86ISD::TESTP) + return SDValue(); + + // PTEST/TESTP sets EFLAGS as: + // TESTZ: ZF = (Op0 & Op1) == 0 + // TESTC: CF = (~Op0 & Op1) == 0 + // TESTNZC: ZF == 0 && CF == 0 + EVT VT = EFLAGS.getValueType(); + SDValue Op0 = EFLAGS.getOperand(0); + SDValue Op1 = EFLAGS.getOperand(1); + EVT OpVT = Op0.getValueType(); + + // TEST*(~X,Y) == TEST*(X,Y) + if (SDValue NotOp0 = IsNOT(Op0, DAG)) { + X86::CondCode InvCC; + switch (CC) { + case X86::COND_B: + // testc -> testz. + InvCC = X86::COND_E; + break; + case X86::COND_AE: + // !testc -> !testz. + InvCC = X86::COND_NE; + break; + case X86::COND_E: + // testz -> testc. + InvCC = X86::COND_B; + break; + case X86::COND_NE: + // !testz -> !testc. + InvCC = X86::COND_AE; + break; + case X86::COND_A: + case X86::COND_BE: + // testnzc -> testnzc (no change). + InvCC = CC; + break; + default: + InvCC = X86::COND_INVALID; + break; + } + + if (InvCC != X86::COND_INVALID) { + CC = InvCC; + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, + DAG.getBitcast(OpVT, NotOp0), Op1); + } + } + + if (CC == X86::COND_E || CC == X86::COND_NE) { + // TESTZ(X,~Y) == TESTC(Y,X) + if (SDValue NotOp1 = IsNOT(Op1, DAG)) { + CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, + DAG.getBitcast(OpVT, NotOp1), Op0); + } + + // TESTZ(-1,X) == TESTZ(X,X) + if (ISD::isBuildVectorAllOnes(Op0.getNode())) + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1); + + // TESTZ(X,-1) == TESTZ(X,X) + if (ISD::isBuildVectorAllOnes(Op1.getNode())) + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); + } + + return SDValue(); +} + /// Optimize an EFLAGS definition used according to the condition code \p CC /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing /// uses of chain values. @@ -39764,6 +39868,10 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) return R; + + if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG)) + return R; + return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); } @@ -44173,11 +44281,9 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT, - LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT, LN->getPointerInfo(), + LN->getAlign(), LN->getMemOperand()->getFlags()); SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); DCI.CombineTo(N, Convert); @@ -44209,11 +44315,9 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT, - LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT, LN->getPointerInfo(), + LN->getAlign(), LN->getMemOperand()->getFlags()); SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); DCI.CombineTo(N, Convert); @@ -44298,11 +44402,9 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MVT::i64, - LN->getPointerInfo(), - LN->getAlignment(), - LN->getMemOperand()->getFlags()); + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, dl, Tys, Ops, MVT::i64, LN->getPointerInfo(), + LN->getAlign(), LN->getMemOperand()->getFlags()); SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32, DAG.getBitcast(MVT::v8i16, VZLoad)); DCI.CombineTo(N, Convert); @@ -45546,7 +45648,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, std::pair Tmp = Subtarget.getTargetLowering()->BuildFILD( VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), DAG); + Ld->getPointerInfo(), Ld->getAlign(), DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second); return Tmp.first; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c9fea553e282..5b18c09f9cf2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1215,7 +1215,7 @@ namespace llvm { std::pair BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, MachinePointerInfo PtrInfo, - unsigned Align, + Align Alignment, SelectionDAG &DAG) const; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; @@ -1564,35 +1564,14 @@ namespace llvm { }; /// Generate unpacklo/unpackhi shuffle mask. - template - void createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo, - bool Unary) { - assert(Mask.empty() && "Expected an empty shuffle mask vector"); - int NumElts = VT.getVectorNumElements(); - int NumEltsInLane = 128 / VT.getScalarSizeInBits(); - for (int i = 0; i < NumElts; ++i) { - unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; - int Pos = (i % NumEltsInLane) / 2 + LaneStart; - Pos += (Unary ? 0 : NumElts * (i % 2)); - Pos += (Lo ? 0 : NumEltsInLane / 2); - Mask.push_back(Pos); - } - } + void createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo, + bool Unary); /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation /// imposed by AVX and specific to the unary pattern. Example: /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> - template - void createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo) { - assert(Mask.empty() && "Expected an empty shuffle mask vector"); - int NumElts = VT.getVectorNumElements(); - for (int i = 0; i < NumElts; ++i) { - int Pos = i / 2; - Pos += (Lo ? 0 : NumElts / 2); - Mask.push_back(Pos); - } - } + void createSplat2ShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo); } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp index f0288adf52ce..8c3b18505157 100644 --- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp +++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp @@ -325,19 +325,19 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8( MVT VT = MVT::v8i16; TransposedMatrix.resize(2); - SmallVector MaskLow; - SmallVector MaskLowTemp1, MaskLowWord; - SmallVector MaskHighTemp1, MaskHighWord; + SmallVector MaskLow; + SmallVector MaskLowTemp1, MaskLowWord; + SmallVector MaskHighTemp1, MaskHighWord; for (unsigned i = 0; i < 8; ++i) { MaskLow.push_back(i); MaskLow.push_back(i + 8); } - createUnpackShuffleMask(VT, MaskLowTemp1, true, false); - createUnpackShuffleMask(VT, MaskHighTemp1, false, false); - scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); - scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); + createUnpackShuffleMask(VT, MaskLowTemp1, true, false); + createUnpackShuffleMask(VT, MaskHighTemp1, false, false); + scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); + scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); // IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7 // IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7 Value *IntrVec1Low = @@ -367,25 +367,25 @@ void X86InterleavedAccessGroup::interleave8bitStride4( MVT HalfVT = scaleVectorType(VT); TransposedMatrix.resize(4); - SmallVector MaskHigh; - SmallVector MaskLow; - SmallVector LowHighMask[2]; - SmallVector MaskHighTemp; - SmallVector MaskLowTemp; + SmallVector MaskHigh; + SmallVector MaskLow; + SmallVector LowHighMask[2]; + SmallVector MaskHighTemp; + SmallVector MaskLowTemp; // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86 // shuffle pattern. - createUnpackShuffleMask(VT, MaskLow, true, false); - createUnpackShuffleMask(VT, MaskHigh, false, false); + createUnpackShuffleMask(VT, MaskLow, true, false); + createUnpackShuffleMask(VT, MaskHigh, false, false); // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86 // shuffle pattern. - createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); - createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); - scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]); - scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]); + createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); + createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); + scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]); + scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]); // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23 // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31 diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 02fff40261d1..1e5591fd8d18 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -5030,6 +5030,7 @@ struct AAHeapToStackImpl : public AAHeapToStack { LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall << "\n"); + MaybeAlign Alignment; Constant *Size; if (isCallocLikeFn(MallocCall, TLI)) { auto *Num = cast(MallocCall->getOperand(0)); @@ -5037,13 +5038,19 @@ struct AAHeapToStackImpl : public AAHeapToStack { APInt TotalSize = SizeT->getValue() * Num->getValue(); Size = ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else if (isAlignedAllocLikeFn(MallocCall, TLI)) { + Size = cast(MallocCall->getOperand(1)); + Alignment = MaybeAlign(cast(MallocCall->getOperand(0)) + ->getValue() + .getZExtValue()); } else { Size = cast(MallocCall->getOperand(0)); } unsigned AS = cast(MallocCall->getType())->getAddressSpace(); - Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, - Size, "", MallocCall->getNextNode()); + Instruction *AI = + new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, + "", MallocCall->getNextNode()); if (AI->getType() != MallocCall->getType()) AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", @@ -5175,8 +5182,9 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { return true; bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI); bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); - if (!IsMalloc && !IsCalloc) { + if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) { BadMallocCalls.insert(&I); return true; } @@ -5188,6 +5196,14 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { MallocCalls.insert(&I); return true; } + } else if (IsAlignedAllocLike && isa(I.getOperand(0))) { + // Only if the alignment and sizes are constant. + if (auto *Size = dyn_cast(I.getOperand(1))) + if (Size->getValue().ule(MaxHeapToStackSize)) + if (UsesCheck(I) || FreeCheck(I)) { + MallocCalls.insert(&I); + return true; + } } else if (IsCalloc) { bool Overflow = false; if (auto *Num = dyn_cast(I.getOperand(0))) @@ -5219,8 +5235,9 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl { /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override { - STATS_DECL(MallocCalls, Function, - "Number of malloc calls converted to allocas"); + STATS_DECL( + MallocCalls, Function, + "Number of malloc/calloc/aligned_alloc calls converted to allocas"); for (auto *C : MallocCalls) if (!BadMallocCalls.count(C)) ++BUILD_STAT_NAME(MallocCalls, Function); @@ -7292,6 +7309,16 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { /// Attributor /// ---------------------------------------------------------------------------- +Attributor::~Attributor() { + // The abstract attributes are allocated via the BumpPtrAllocator Allocator, + // thus we cannot delete them. We can, and want to, destruct them though. + for (AbstractAttribute *AA : AllAbstractAttributes) + AA->~AbstractAttribute(); + + for (auto &It : ArgumentReplacementMap) + DeleteContainerPointers(It.second); +} + bool Attributor::isAssumedDead(const AbstractAttribute &AA, const AAIsDead *FnLivenessAA, bool CheckBBLivenessOnly, DepClassTy DepClass) { @@ -8874,7 +8901,7 @@ const char AAValueConstantRange::ID = 0; #define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ case IRPosition::PK: \ - AA = new CLASS##SUFFIX(IRP); \ + AA = new (A.Allocator) CLASS##SUFFIX(IRP); \ break; #define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 899e8b876a48..195916ead66a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -72,7 +72,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { - I->setOperand(0, V2); + IC.replaceOperand(*I, 0, V2); MadeChange = true; } @@ -591,7 +591,7 @@ bool InstCombiner::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { return false; // Change the div/rem to use 'Y' instead of the select. - I.setOperand(1, SI->getOperand(NonNullOperand)); + replaceOperand(I, 1, SI->getOperand(NonNullOperand)); // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have @@ -619,11 +619,11 @@ bool InstCombiner::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { if (*I == SI) { - *I = SI->getOperand(NonNullOperand); + replaceUse(*I, SI->getOperand(NonNullOperand)); Worklist.push(&*BBI); } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? ConstantInt::getTrue(CondTy) - : ConstantInt::getFalse(CondTy); + replaceUse(*I, NonNullOperand == 1 ? ConstantInt::getTrue(CondTy) + : ConstantInt::getFalse(CondTy)); Worklist.push(&*BBI); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 887369e64852..15ecfe96c48d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2696,8 +2696,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // paths for the values (this helps GetUnderlyingObjects() for example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); - SI.setOperand(0, And); - SI.setOperand(1, TrueSI->getTrueValue()); + replaceOperand(SI, 0, And); + replaceOperand(SI, 1, TrueSI->getTrueValue()); return &SI; } } @@ -2713,8 +2713,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition()); - SI.setOperand(0, Or); - SI.setOperand(2, FalseSI->getFalseValue()); + replaceOperand(SI, 0, Or); + replaceOperand(SI, 2, FalseSI->getFalseValue()); return &SI; } } @@ -2741,14 +2741,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { canMergeSelectThroughBinop(TrueBO)) { if (auto *TrueBOSI = dyn_cast(TrueBO->getOperand(0))) { if (TrueBOSI->getCondition() == CondVal) { - TrueBO->setOperand(0, TrueBOSI->getTrueValue()); + replaceOperand(*TrueBO, 0, TrueBOSI->getTrueValue()); Worklist.push(TrueBO); return &SI; } } if (auto *TrueBOSI = dyn_cast(TrueBO->getOperand(1))) { if (TrueBOSI->getCondition() == CondVal) { - TrueBO->setOperand(1, TrueBOSI->getTrueValue()); + replaceOperand(*TrueBO, 1, TrueBOSI->getTrueValue()); Worklist.push(TrueBO); return &SI; } @@ -2761,14 +2761,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { canMergeSelectThroughBinop(FalseBO)) { if (auto *FalseBOSI = dyn_cast(FalseBO->getOperand(0))) { if (FalseBOSI->getCondition() == CondVal) { - FalseBO->setOperand(0, FalseBOSI->getFalseValue()); + replaceOperand(*FalseBO, 0, FalseBOSI->getFalseValue()); Worklist.push(FalseBO); return &SI; } } if (auto *FalseBOSI = dyn_cast(FalseBO->getOperand(1))) { if (FalseBOSI->getCondition() == CondVal) { - FalseBO->setOperand(1, FalseBOSI->getFalseValue()); + replaceOperand(*FalseBO, 1, FalseBOSI->getFalseValue()); Worklist.push(FalseBO); return &SI; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 23370cf21f07..c3890ed69421 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -910,6 +910,18 @@ Value *InstCombiner::freelyNegateValue(Value *V) { return Builder.CreateSub( I->getOperand(1), I->getOperand(0), I->getName() + ".neg"); + // Negation is equivalent to bitwise-not + 1: + // 0 - (A ^ C) => ((A ^ C) ^ -1) + 1 => A ^ ~C + 1 + case Instruction::Xor: { + Constant *C; + if (match(I->getOperand(1), m_Constant(C))) { + Value *Xor = Builder.CreateXor(I->getOperand(0), ConstantExpr::getNot(C)); + return Builder.CreateAdd(Xor, ConstantInt::get(Xor->getType(), 1), + I->getName() + ".neg"); + } + return nullptr; + } + // 0-(A sdiv C) => A sdiv (0-C) provided the negation doesn't overflow. case Instruction::SDiv: { Constant *C = dyn_cast(I->getOperand(1)); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index d831b0da37a5..2a6ecfcd957a 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -947,6 +947,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo, // Loading the allocation -> undef. if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || + isAlignedAllocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { Res = AvailableValue::get(UndefValue::get(LI->getType())); @@ -1451,7 +1452,7 @@ static bool impliesEquivalanceIfFalse(CmpInst* Cmp) { Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); // If we can prove either side non-zero, then equality must imply - // equivalence. + // equivalence. // FIXME: We should do this optimization if 'no signed zeros' is // applicable via an instruction-level fast-math-flag or some other // indicator that relaxed FP semantics are being used. @@ -1516,10 +1517,10 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // If we find an equality fact, canonicalize all dominated uses in this block // to one of the two values. We heuristically choice the "oldest" of the // two where age is determined by value number. (Note that propagateEquality - // above handles the cross block case.) - // + // above handles the cross block case.) + // // Key case to cover are: - // 1) + // 1) // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen // call void @llvm.assume(i1 %cmp) // ret float %0 ; will change it to ret float 3.000000e+00 @@ -1560,7 +1561,7 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { << *CmpLHS << " with " << *CmpRHS << " in block " << IntrinsicI->getParent()->getName() << "\n"); - + // Setup the replacement map - this handles uses within the same block if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) @@ -1826,7 +1827,7 @@ void GVN::assignBlockRPONumber(Function &F) { bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const { bool Changed = false; for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Value *Operand = Instr->getOperand(OpNum); + Value *Operand = Instr->getOperand(OpNum); auto it = ReplaceOperandsWithMap.find(Operand); if (it != ReplaceOperandsWithMap.end()) { LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " @@ -1946,7 +1947,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // If "A == B" is known true, or "A != B" is known false, then replace // A with B everywhere in the scope. For floating point operations, we - // have to be careful since equality does not always imply equivalance. + // have to be careful since equality does not always imply equivalance. if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) || (isKnownFalse && impliesEquivalanceIfFalse(Cmp))) Worklist.push_back(std::make_pair(Op0, Op1)); diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index d0b96218137c..5fee60fc6bf9 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -350,7 +350,7 @@ using ModelledPHISet = DenseSet>; class InstructionUseExpr : public GVNExpression::BasicExpression { unsigned MemoryUseOrder = -1; bool Volatile = false; - std::vector ShuffleMask; + ArrayRef ShuffleMask; public: InstructionUseExpr(Instruction *I, ArrayRecycler &R, @@ -360,6 +360,9 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { setOpcode(I->getOpcode()); setType(I->getType()); + if (ShuffleVectorInst *SVI = dyn_cast(I)) + ShuffleMask = SVI->getShuffleMask().copy(A); + for (auto &U : I->uses()) op_push_back(U.getUser()); llvm::sort(op_begin(), op_end()); @@ -367,18 +370,15 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } void setVolatile(bool V) { Volatile = V; } - void setShuffleMask(ArrayRef Mask) { - ShuffleMask.assign(Mask.begin(), Mask.end()); - } hash_code getHashValue() const override { return hash_combine(GVNExpression::BasicExpression::getHashValue(), - MemoryUseOrder, Volatile, ArrayRef(ShuffleMask)); + MemoryUseOrder, Volatile, ShuffleMask); } template hash_code getHashValue(Function MapFn) { hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile, - ArrayRef(ShuffleMask)); + ShuffleMask); for (auto *V : operands()) H = hash_combine(H, MapFn(V)); return H; @@ -406,8 +406,6 @@ class ValueTable { CmpInst::Predicate Predicate = C->getPredicate(); E->setOpcode((C->getOpcode() << 8) | Predicate); } - if (ShuffleVectorInst *SVI = dyn_cast(I)) - E->setShuffleMask(SVI->getShuffleMask()); return E; } diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 327a1a6f2e7b..a9a0070c1d57 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2833,6 +2833,16 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { /// select is not jump-threaded, it will be folded again in the later /// optimizations. bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { + // This transform can introduce a UB (a conditional branch that depends on a + // poison value) that was not present in the original program. See + // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll. + // Disable this transform under MemorySanitizer. + // FIXME: either delete it or replace with a valid transform. This issue is + // not limited to MemorySanitizer (but has only been observed as an MSan false + // positive in practice so far). + if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) + return false; + // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 17b2c534513d..b93a8bfeaa46 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1470,7 +1470,8 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. Note that this is only true in the case // that the result of the allocation is pointer equal to the load ptr. - if (isa(DepInst) || isMallocLikeFn(DepInst, TLI)) { + if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || + isAlignedAllocLikeFn(DepInst, TLI)) { return createConstantExpression(UndefValue::get(LoadType)); } // If this load occurs either right after a lifetime begin, diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 0c51fd5ff423..05025747db8e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -662,7 +662,8 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { public: SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) : PtrUseVisitor(DL), - AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {} + AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()), + AS(AS) {} private: void markAsDead(Instruction &I) { @@ -751,8 +752,10 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { // For array or vector indices, scale the index by the size of the // type. APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); - GEPOffset += Index * APInt(Offset.getBitWidth(), - DL.getTypeAllocSize(GTI.getIndexedType())); + GEPOffset += + Index * + APInt(Offset.getBitWidth(), + DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize()); } // If this index has computed an intermediate pointer which is not @@ -787,7 +790,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&LI); - uint64_t Size = DL.getTypeStoreSize(LI.getType()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize(); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -802,7 +805,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&SI); - uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize(); // If this memory access can be shown to *statically* extend outside the // bounds of the allocation, it's behavior is undefined, so simply @@ -1220,7 +1223,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { if (BBI->mayWriteToMemory()) return false; - uint64_t Size = DL.getTypeStoreSize(LI->getType()); + uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize(); MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment())); MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; HaveLoad = true; @@ -1478,7 +1481,8 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, // extremely poorly defined currently. The long-term goal is to remove GEPing // over a vector from the IR completely. if (VectorType *VecTy = dyn_cast(Ty)) { - unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType()); + unsigned ElementSizeInBits = + DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize(); if (ElementSizeInBits % 8 != 0) { // GEPs over non-multiple of 8 size vector elements are invalid. return nullptr; @@ -1495,7 +1499,8 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, if (ArrayType *ArrTy = dyn_cast(Ty)) { Type *ElementTy = ArrTy->getElementType(); - APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); + APInt ElementSize(Offset.getBitWidth(), + DL.getTypeAllocSize(ElementTy).getFixedSize()); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(ArrTy->getNumElements())) return nullptr; @@ -1517,7 +1522,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, unsigned Index = SL->getElementContainingOffset(StructOffset); Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); Type *ElementTy = STy->getElementType(Index); - if (Offset.uge(DL.getTypeAllocSize(ElementTy))) + if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize())) return nullptr; // The offset points into alignment padding. Indices.push_back(IRB.getInt32(Index)); @@ -1549,7 +1554,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) return nullptr; // We can't GEP through an unsized element. - APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); + APInt ElementSize(Offset.getBitWidth(), + DL.getTypeAllocSize(ElementTy).getFixedSize()); if (ElementSize == 0) return nullptr; // Zero-length arrays can't help us build a natural GEP. APInt NumSkippedElements = Offset.sdiv(ElementSize); @@ -1716,7 +1722,8 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return false; } - if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) + if (DL.getTypeSizeInBits(NewTy).getFixedSize() != + DL.getTypeSizeInBits(OldTy).getFixedSize()) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) return false; @@ -1889,7 +1896,8 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // Return if bitcast to vectors is different for total size in bits. if (!CandidateTys.empty()) { VectorType *V = CandidateTys[0]; - if (DL.getTypeSizeInBits(VTy) != DL.getTypeSizeInBits(V)) { + if (DL.getTypeSizeInBits(VTy).getFixedSize() != + DL.getTypeSizeInBits(V).getFixedSize()) { CandidateTys.clear(); return; } @@ -1935,7 +1943,8 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // they're all integer vectors. We sort by ascending number of elements. auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) { (void)DL; - assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && + assert(DL.getTypeSizeInBits(RHSTy).getFixedSize() == + DL.getTypeSizeInBits(LHSTy).getFixedSize() && "Cannot have vector types of different sizes!"); assert(RHSTy->getElementType()->isIntegerTy() && "All non-integer types eliminated!"); @@ -1963,13 +1972,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // Try each vector type, and return the one which works. auto CheckVectorTypeForPromotion = [&](VectorType *VTy) { - uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType()); + uint64_t ElementSize = + DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize(); // While the definition of LLVM vectors is bitpacked, we don't support sizes // that aren't byte sized. if (ElementSize % 8) return false; - assert((DL.getTypeSizeInBits(VTy) % 8) == 0 && + assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 && "vector size not a multiple of element size?"); ElementSize /= 8; @@ -1999,7 +2009,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp) { - uint64_t Size = DL.getTypeStoreSize(AllocaTy); + uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedSize(); uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; uint64_t RelEnd = S.endOffset() - AllocBeginOffset; @@ -2015,7 +2025,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (LI->isVolatile()) return false; // We can't handle loads that extend past the allocated memory. - if (DL.getTypeStoreSize(LI->getType()) > Size) + if (DL.getTypeStoreSize(LI->getType()).getFixedSize() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerLoad. @@ -2027,7 +2037,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (!isa(LI->getType()) && RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast(LI->getType())) { - if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy)) + if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize()) return false; } else if (RelBegin != 0 || RelEnd != Size || !canConvertValue(DL, AllocaTy, LI->getType())) { @@ -2040,7 +2050,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (SI->isVolatile()) return false; // We can't handle stores that extend past the allocated memory. - if (DL.getTypeStoreSize(ValueTy) > Size) + if (DL.getTypeStoreSize(ValueTy).getFixedSize() > Size) return false; // So far, AllocaSliceRewriter does not support widening split slice tails // in rewriteIntegerStore. @@ -2052,7 +2062,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (!isa(ValueTy) && RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast(ValueTy)) { - if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy)) + if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize()) return false; } else if (RelBegin != 0 || RelEnd != Size || !canConvertValue(DL, ValueTy, AllocaTy)) { @@ -2083,13 +2093,13 @@ static bool isIntegerWideningViableForSlice(const Slice &S, /// promote the resulting alloca. static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL) { - uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy); + uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedSize(); // Don't create integer types larger than the maximum bitwidth. if (SizeInBits > IntegerType::MAX_INT_BITS) return false; // Don't try to handle allocas with bit-padding. - if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy)) + if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize()) return false; // We need to ensure that an integer type with the appropriate bitwidth can @@ -2128,11 +2138,13 @@ static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, const Twine &Name) { LLVM_DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast(V->getType()); - assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && + assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= + DL.getTypeStoreSize(IntTy).getFixedSize() && "Element extends past full value"); uint64_t ShAmt = 8 * Offset; if (DL.isBigEndian()) - ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); + ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() - + DL.getTypeStoreSize(Ty).getFixedSize() - Offset); if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); @@ -2157,11 +2169,13 @@ static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, V = IRB.CreateZExt(V, IntTy, Name + ".ext"); LLVM_DEBUG(dbgs() << " extended: " << *V << "\n"); } - assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && + assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <= + DL.getTypeStoreSize(IntTy).getFixedSize() && "Element store outside of alloca store"); uint64_t ShAmt = 8 * Offset; if (DL.isBigEndian()) - ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); + ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() - + DL.getTypeStoreSize(Ty).getFixedSize() - Offset); if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); @@ -2324,18 +2338,20 @@ class llvm::sroa::AllocaSliceRewriter NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAI.getAllocatedType()), - IntTy(IsIntegerPromotable - ? Type::getIntNTy( - NewAI.getContext(), - DL.getTypeSizeInBits(NewAI.getAllocatedType())) - : nullptr), + IntTy( + IsIntegerPromotable + ? Type::getIntNTy(NewAI.getContext(), + DL.getTypeSizeInBits(NewAI.getAllocatedType()) + .getFixedSize()) + : nullptr), VecTy(PromotableVecTy), ElementTy(VecTy ? VecTy->getElementType() : nullptr), - ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), + ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8 + : 0), PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { - assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && + assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ++NumVectorized; } @@ -2500,7 +2516,8 @@ class llvm::sroa::AllocaSliceRewriter Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) : LI.getType(); - const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize; + const bool IsLoadPastEnd = + DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize; bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2568,7 +2585,7 @@ class llvm::sroa::AllocaSliceRewriter assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); - assert(SliceSize < DL.getTypeStoreSize(LI.getType()) && + assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() && "Split load isn't smaller than original load"); assert(DL.typeSizeEqualsStoreSize(LI.getType()) && "Non-byte-multiple bit width"); @@ -2626,7 +2643,8 @@ class llvm::sroa::AllocaSliceRewriter bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { + if (DL.getTypeSizeInBits(V->getType()).getFixedSize() != + IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "oldload"); Old = convertValue(DL, IRB, Old, IntTy); @@ -2661,7 +2679,7 @@ class llvm::sroa::AllocaSliceRewriter if (AllocaInst *AI = dyn_cast(V->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); - if (SliceSize < DL.getTypeStoreSize(V->getType())) { + if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedSize()) { assert(!SI.isVolatile()); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -2677,7 +2695,8 @@ class llvm::sroa::AllocaSliceRewriter if (IntTy && V->getType()->isIntegerTy()) return rewriteIntegerStore(V, SI, AATags); - const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize; + const bool IsStorePastEnd = + DL.getTypeStoreSize(V->getType()).getFixedSize() > SliceSize; StoreInst *NewSI; if (NewBeginOffset == NewAllocaBeginOffset && NewEndOffset == NewAllocaEndOffset && @@ -2792,7 +2811,7 @@ class llvm::sroa::AllocaSliceRewriter auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); auto *SrcTy = VectorType::get(Int8Ty, Len); return canConvertValue(DL, SrcTy, AllocaTy) && - DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)); + DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedSize()); }(); // If this doesn't map cleanly onto the alloca type, and that type isn't @@ -2826,8 +2845,8 @@ class llvm::sroa::AllocaSliceRewriter unsigned NumElements = EndIndex - BeginIndex; assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Value *Splat = - getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8); + Value *Splat = getIntegerSplat( + II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8); Splat = convertValue(DL, IRB, Splat, ElementTy); if (NumElements > 1) Splat = getVectorSplat(Splat, NumElements); @@ -2860,7 +2879,8 @@ class llvm::sroa::AllocaSliceRewriter assert(NewBeginOffset == NewAllocaBeginOffset); assert(NewEndOffset == NewAllocaEndOffset); - V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8); + V = getIntegerSplat(II.getValue(), + DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8); if (VectorType *AllocaVecTy = dyn_cast(AllocaTy)) V = getVectorSplat(V, AllocaVecTy->getNumElements()); @@ -2923,7 +2943,8 @@ class llvm::sroa::AllocaSliceRewriter bool EmitMemCpy = !VecTy && !IntTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || - SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) || + SliceSize != + DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() || !NewAI.getAllocatedType()->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the @@ -3469,8 +3490,8 @@ static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { if (Ty->isSingleValueType()) return Ty; - uint64_t AllocSize = DL.getTypeAllocSize(Ty); - uint64_t TypeSize = DL.getTypeSizeInBits(Ty); + uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize(); + uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize(); Type *InnerTy; if (ArrayType *ArrTy = dyn_cast(Ty)) { @@ -3483,8 +3504,8 @@ static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { return Ty; } - if (AllocSize > DL.getTypeAllocSize(InnerTy) || - TypeSize > DL.getTypeSizeInBits(InnerTy)) + if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() || + TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize()) return Ty; return stripAggregateTypeWrapping(DL, InnerTy); @@ -3505,15 +3526,15 @@ static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { /// return a type if necessary. static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size) { - if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size) + if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedSize() == Size) return stripAggregateTypeWrapping(DL, Ty); - if (Offset > DL.getTypeAllocSize(Ty) || - (DL.getTypeAllocSize(Ty) - Offset) < Size) + if (Offset > DL.getTypeAllocSize(Ty).getFixedSize() || + (DL.getTypeAllocSize(Ty).getFixedSize() - Offset) < Size) return nullptr; if (SequentialType *SeqTy = dyn_cast(Ty)) { Type *ElementTy = SeqTy->getElementType(); - uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); + uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize(); uint64_t NumSkippedElements = Offset / ElementSize; if (NumSkippedElements >= SeqTy->getNumElements()) return nullptr; @@ -3553,7 +3574,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, Offset -= SL->getElementOffset(Index); Type *ElementTy = STy->getElementType(Index); - uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); + uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize(); if (Offset >= ElementSize) return nullptr; // The offset points into alignment padding. @@ -4121,7 +4142,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) - if (DL.getTypeAllocSize(CommonUseTy) >= P.size()) + if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size()) SliceTy = CommonUseTy; if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), @@ -4133,7 +4154,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, SliceTy = Type::getIntNTy(*C, P.size() * 8); if (!SliceTy) SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); - assert(DL.getTypeAllocSize(SliceTy) >= P.size()); + assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size()); bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); @@ -4274,7 +4295,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // to be rewritten into a partition. bool IsSorted = true; - uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType()); + uint64_t AllocaSize = + DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize(); const uint64_t MaxBitVectorSize = 1024; if (AllocaSize <= MaxBitVectorSize) { // If a byte boundary is included in any load or store, a slice starting or @@ -4338,7 +4360,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { Changed = true; if (NewAI != &AI) { uint64_t SizeOfByte = 8; - uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType()); + uint64_t AllocaSize = + DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedSize(); // Don't include any padding. uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size)); @@ -4358,7 +4381,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { auto *Expr = DbgDeclares.front()->getExpression(); auto VarSize = Var->getSizeInBits(); DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); - uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType()); + uint64_t AllocaSize = + DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize(); for (auto Fragment : Fragments) { // Create a fragment expression describing the new partition or reuse AI's // expression if there is only one partition. @@ -4446,8 +4470,10 @@ bool SROA::runOnAlloca(AllocaInst &AI) { const DataLayout &DL = AI.getModule()->getDataLayout(); // Skip alloca forms that this analysis can't handle. - if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || - DL.getTypeAllocSize(AI.getAllocatedType()) == 0) + auto *AT = AI.getAllocatedType(); + if (AI.isArrayAllocation() || !AT->isSized() || + (isa(AT) && cast(AT)->isScalable()) || + DL.getTypeAllocSize(AT).getFixedSize() == 0) return false; bool Changed = false; @@ -4567,8 +4593,15 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); I != E; ++I) { - if (AllocaInst *AI = dyn_cast(I)) - Worklist.insert(AI); + if (AllocaInst *AI = dyn_cast(I)) { + if (isa(AI->getAllocatedType()) && + cast(AI->getAllocatedType())->isScalable()) { + if (isAllocaPromotable(AI)) + PromotableAllocas.push_back(AI); + } else { + Worklist.insert(AI); + } + } } bool Changed = false; diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 0eca6704b496..deff56b9e27e 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -522,7 +522,7 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, // sext(a + b) = sext(a) + sext(b) // even if the addition is not marked nsw. // - // Leveraging this invarient, we can trace into an sext'ed inbound GEP + // Leveraging this invariant, we can trace into an sext'ed inbound GEP // index if the constant offset is non-negative. // // Verified in @sext_add in split-gep.ll. @@ -552,6 +552,9 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, bool SignExtended, bool ZeroExtended) { + // Save off the current height of the chain, in case we need to restore it. + size_t ChainLength = UserChain.size(); + // BO being non-negative does not shed light on whether its operands are // non-negative. Clear the NonNegative flag here. APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, @@ -562,12 +565,22 @@ APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, // However, such cases are probably already handled by -instcombine, // given this pass runs after the standard optimizations. if (ConstantOffset != 0) return ConstantOffset; + + // Reset the chain back to where it was when we started exploring this node, + // since visiting the LHS didn't pan out. + UserChain.resize(ChainLength); + ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, /* NonNegative */ false); // If U is a sub operator, negate the constant offset found in the right // operand. if (BO->getOpcode() == Instruction::Sub) ConstantOffset = -ConstantOffset; + + // If RHS wasn't a suitable candidate either, reset the chain again. + if (ConstantOffset == 0) + UserChain.resize(ChainLength); + return ConstantOffset; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 36d05345f23b..b8b3d1895093 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -80,21 +80,11 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); -static cl::opt UpdateReturnAttributes( - "update-return-attrs", cl::init(true), cl::Hidden, - cl::desc("Update return attributes on calls within inlined body")); - static cl::opt PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(true), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); -static cl::opt InlinerAttributeWindow( - "inliner-attribute-window", cl::Hidden, - cl::desc("the maximum number of instructions analyzed for may throw during " - "attribute inference in inlined body"), - cl::init(4)); - llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { @@ -1146,81 +1136,6 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, } } -static bool MayContainThrowingOrExitingCall(Instruction *Begin, - Instruction *End) { - - assert(Begin->getParent() == End->getParent() && - "Expected to be in same basic block!"); - unsigned NumInstChecked = 0; - // Check that all instructions in the range [Begin, End) are guaranteed to - // transfer execution to successor. - for (auto &I : make_range(Begin->getIterator(), End->getIterator())) - if (NumInstChecked++ > InlinerAttributeWindow || - !isGuaranteedToTransferExecutionToSuccessor(&I)) - return true; - return false; -} - -static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) { - if (!UpdateReturnAttributes) - return; - AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex); - if (AB.empty()) - return; - - auto *CalledFunction = CS.getCalledFunction(); - auto &Context = CalledFunction->getContext(); - - for (auto &BB : *CalledFunction) { - auto *RI = dyn_cast(BB.getTerminator()); - if (!RI || !isa(RI->getOperand(0))) - continue; - // Sanity check that the cloned return instruction exists and is a return - // instruction itself. - auto *NewRI = dyn_cast_or_null(VMap.lookup(RI)); - if (!NewRI) - continue; - auto *RetVal = cast(RI->getOperand(0)); - // Sanity check that the cloned RetVal exists and is a call. - // Simplification during inlining could have transformed the cloned - // instruction. - auto *NewRetVal = dyn_cast_or_null(VMap.lookup(RetVal)); - if (!NewRetVal) - continue; - // Backward propagation of attributes to the returned value may be incorrect - // if it is control flow dependent. - // Consider: - // @callee { - // %rv = call @foo() - // %rv2 = call @bar() - // if (%rv2 != null) - // return %rv2 - // if (%rv == null) - // exit() - // return %rv - // } - // caller() { - // %val = call nonnull @callee() - // } - // Here we cannot add the nonnull attribute on either foo or bar. So, we - // limit the check to both NewRetVal and NewRI are in the same basic block - // and there are no throwing/exiting instructions between these - // instructions. - if (NewRI->getParent() != NewRetVal->getParent() || - MayContainThrowingOrExitingCall(NewRetVal, NewRI)) - continue; - // Add to the existing attributes of NewRetVal. - // NB! When we have the same attribute already existing on NewRetVal, but - // with a differing value, the AttributeList's merge API honours the already - // existing attribute value (i.e. attributes such as dereferenceable, - // dereferenceable_or_null etc). See AttrBuilder::merge for more details. - AttributeList AL = NewRetVal->getAttributes(); - AttributeList NewAL = - AL.addAttributes(Context, AttributeList::ReturnIndex, AB); - NewRetVal->setAttributes(NewAL); - } -} - /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { @@ -1886,10 +1801,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); - // Clone return attributes on the callsite into the calls within the inlined - // function which feed into its return value. - AddReturnAttributes(CS, VMap); - // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CS, VMap); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 77fe6c1cb12a..10eb1212a591 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -832,6 +832,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) { B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); replaceAllUsesWith(Old, Cmp); + eraseFromParent(Old); } return CI; } @@ -2170,8 +2171,10 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) { auto replaceTrigInsts = [this](SmallVectorImpl &Calls, Value *Res) { - for (CallInst *C : Calls) + for (CallInst *C : Calls) { replaceAllUsesWith(C, Res); + eraseFromParent(C); + } }; replaceTrigInsts(SinCalls, Sin); diff --git a/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll b/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll new file mode 100644 index 000000000000..67e9c49675cf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll @@ -0,0 +1,62 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows | FileCheck %s +; Check that the stack bump around a funclet is computed correctly in both the +; prologue and epilogue in the case we have a MaxCallFrameSize > 0 and are doing alloca +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-windows-msvc19.25.28611" + +; // requires passing arguments on the stack +; void test2(void*, int, int, int, int, int, int, int, int); +; +; // function with the funclet being checked +; void test1(size_t bytes) +; { +; // alloca forces a separate callee save bump and stack bump +; void *data = _alloca(bytes); +; try { +; test2(data, 0, 1, 2, 3, 4, 5, 6, 7); +; } catch (...) { +; // the funclet being checked +; } +; } + +; CHECK-LABEL: ?catch$2@?0??test1@@YAX_K@Z@4HA +; CHECK: sub sp, sp, #16 +; CHECK: add sp, sp, #16 +; Function Attrs: uwtable +define dso_local void @"?test1@@YAX_K@Z"(i64 %0) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { + %2 = alloca i64, align 8 + %3 = alloca i8*, align 8 + store i64 %0, i64* %2, align 8 + %4 = load i64, i64* %2, align 8 + %5 = alloca i8, i64 %4, align 16 + store i8* %5, i8** %3, align 8 + %6 = load i8*, i8** %3, align 8 + invoke void @"?test2@@YAXPEAXHHHHHHHH@Z"(i8* %6, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) + to label %13 unwind label %7 + +7: ; preds = %1 + %8 = catchswitch within none [label %9] unwind to caller + +9: ; preds = %7 + %10 = catchpad within %8 [i8* null, i32 64, i8* null] + catchret from %10 to label %11 + +11: ; preds = %9 + br label %12 + +12: ; preds = %11, %13 + ret void + +13: ; preds = %1 + br label %12 +} + +declare dso_local void @"?test2@@YAXPEAXHHHHHHHH@Z"(i8*, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare dso_local i32 @__CxxFrameHandler3(...) + +attributes #0 = { uwtable } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 2} diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll index 66558c90a79c..dbc6c4b0804b 100644 --- a/llvm/test/CodeGen/AArch64/seh-finally.ll +++ b/llvm/test/CodeGen/AArch64/seh-finally.ll @@ -37,7 +37,7 @@ entry: ; CHECK-LABEL: simple_seh ; CHECK: add x29, sp, #16 ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x29, #-16] +; CHECK: stur x0, [x29, #16] ; CHECK: .set .Lsimple_seh$frame_escape_0, -8 ; CHECK: ldur w0, [x29, #-8] ; CHECK: bl foo @@ -87,13 +87,13 @@ define void @stack_realign() #0 personality i8* bitcast (i32 (...)* @__C_specifi entry: ; CHECK-LABEL: stack_realign ; CHECK: mov x29, sp -; CHECK: sub x9, sp, #64 +; CHECK: sub x9, sp, #16 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x19, #16] -; CHECK: .set .Lstack_realign$frame_escape_0, 32 -; CHECK: ldr w0, [x19, #32] +; CHECK: stur x0, [x29, #32] +; CHECK: .set .Lstack_realign$frame_escape_0, 0 +; CHECK: ldr w0, [x19] ; CHECK: bl foo %o = alloca %struct.S, align 32 @@ -142,7 +142,7 @@ entry: ; CHECK-LABEL: vla_present ; CHECK: add x29, sp, #32 ; CHECK: mov x1, #-2 -; CHECK: stur x1, [x29, #-32] +; CHECK: stur x1, [x29, #16] ; CHECK: .set .Lvla_present$frame_escape_0, -4 ; CHECK: stur w0, [x29, #-4] ; CHECK: ldur w8, [x29, #-4] @@ -206,17 +206,17 @@ define void @vla_and_realign(i32 %n) #0 personality i8* bitcast (i32 (...)* @__C entry: ; CHECK-LABEL: vla_and_realign ; CHECK: mov x29, sp -; CHECK: sub x9, sp, #64 +; CHECK: sub x9, sp, #48 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x1, #-2 -; CHECK: stur x1, [x19] +; CHECK: stur x1, [x29, #32] ; CHECK: .set .Lvla_and_realign$frame_escape_0, 32 -; CHECK: str w0, [x29, #28] -; CHECK: ldr w8, [x29, #28] +; CHECK: str w0, [x29, #44] +; CHECK: ldr w8, [x29, #44] ; CHECK: mov x9, sp -; CHECK: str x9, [x19, #24] -; CHECK: str x8, [x19, #16] +; CHECK: str x9, [x29, #24] +; CHECK: str x8, [x19, #24] ; CHECK: ldr w0, [x19, #32] ; CHECK: bl foo diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll index d84c07f8bc1a..cbed64ab99e3 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll @@ -4,11 +4,10 @@ ; but the original issue only reproduced if the cbz was immediately ; after the frame setup.) -; CHECK: sub sp, sp, #32 -; CHECK-NEXT: stp x29, x30, [sp, #16] -; CHECK-NEXT: add x29, sp, #16 +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: mov x1, #-2 -; CHECK-NEXT: stur x1, [x29, #-16] +; CHECK-NEXT: stur x1, [x29, #16] ; CHECK-NEXT: cbz w0, .LBB0_2 target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll index b10a0f3033a0..a66ec10748e7 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll @@ -12,7 +12,7 @@ ; CHECK: stp x29, x30, [sp, #-32]! ; CHECK-NEXT: str x28, [sp, #16] ; CHECK-NEXT: str x19, [sp, #24] -; CHECK-NEXT: add x0, x19, #64 +; CHECK-NEXT: add x0, x19, #0 ; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: bl "?bb@@YAXPEAHH@Z" ; CHECK-NEXT: adrp x0, .LBB0_1 diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll index 3ae2df37efe4..73909825d377 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll @@ -11,11 +11,11 @@ ; and the parent function. ; The following checks that the unwind help object has -2 stored into it at -; fp - 400 - 256 = fp - 656, which is on-entry sp - 48 + 32 - 656 = -; on-entry sp - 672. We check this offset in the table later on. +; fp + 16, which is on-entry sp - 16. +; We check this offset in the table later on. ; CHECK-LABEL: "?func@@YAHXZ": -; CHECK: stp x29, x30, [sp, #-48]! +; CHECK: stp x29, x30, [sp, #-64]! ; CHECK: str x28, [sp, #16] ; CHECK: str x21, [sp, #24] ; CHECK: stp x19, x20, [sp, #32] @@ -23,7 +23,7 @@ ; CHECK: sub sp, sp, #624 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x19] +; CHECK: stur x0, [x29, #48] ; Now check that x is stored at fp - 20. We check that this is the same ; location accessed from the funclet to retrieve x. @@ -72,7 +72,7 @@ ; Now check that the offset of the unwind help object from the stack pointer on ; entry to func is encoded in cppxdata that is passed to __CxxFrameHandler3. As -; computed above, this comes to -672. +; computed above, this comes to -16. ; CHECK-LABEL: "$cppxdata$?func@@YAHXZ": ; CHECK-NEXT: .word 429065506 ; MagicNumber ; CHECK-NEXT: .word 2 ; MaxState @@ -81,7 +81,7 @@ ; CHECK-NEXT: .word ("$tryMap$?func@@YAHXZ")@IMGREL ; TryBlockMap ; CHECK-NEXT: .word 4 ; IPMapEntries ; CHECK-NEXT: .word ("$ip2state$?func@@YAHXZ")@IMGREL ; IPToStateXData -; CHECK-NEXT: .word -672 ; UnwindHelp +; CHECK-NEXT: .word -16 ; UnwindHelp ; UNWIND: Function: ?func@@YAHXZ (0x0) ; UNWIND: Prologue [ @@ -91,7 +91,7 @@ ; UNWIND-NEXT: ; stp x19, x20, [sp, #32] ; UNWIND-NEXT: ; str x21, [sp, #24] ; UNWIND-NEXT: ; str x28, [sp, #16] -; UNWIND-NEXT: ; stp x29, x30, [sp, #-48]! +; UNWIND-NEXT: ; stp x29, x30, [sp, #-64]! ; UNWIND-NEXT: ; end ; UNWIND: Function: ?catch$2@?0??func@@YAHXZ@4HA ; UNWIND: Prologue [ diff --git a/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll b/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll new file mode 100644 index 000000000000..6ec78087020c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll @@ -0,0 +1,69 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows | FileCheck %s +; Check that we allocate the unwind help stack object in a fixed location from fp +; so that the runtime can find it when handling an exception +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-windows-msvc19.25.28611" + +; Check that the store to the unwind help object for func2 is via FP +; CHECK-LABEL: ?func2@@YAXXZ +; CHECK: mov x[[#SCRATCH_REG:]], #-2 +; CHECK: stur x[[#SCRATCH_REG:]], [x29, #[[#]]] +; +; // struct that requires greater than stack alignment +; struct alignas(32) A +; { +; // data that would be invalid for unwind help (> 0) +; int _x[4]{42, 42, 42, 42}; +; ~A() {} +; }; +; +; // cause us to run the funclet in func2 +; void func3() +; { +; throw 1; +; } +; +; // the funclet that ensures we have the unwind help correct +; void func2() +; { +; A a; +; func3(); +; } +; +; // function to ensure we are misaligned in func2 +; void func1() +; { +; func2(); +; } +; +; // set things up and ensure alignment for func1 +; void test() +; { +; try { +; A a; +; func1(); +; } catch(...) {} +; } + +%struct.A = type { [4 x i32], [16 x i8] } +declare dso_local %struct.A* @"??0A@@QEAA@XZ"(%struct.A* returned %0) +declare dso_local void @"??1A@@QEAA@XZ"(%struct.A* %0) +declare dso_local i32 @__CxxFrameHandler3(...) +declare dso_local void @"?func3@@YAXXZ"() + +; Function Attrs: noinline optnone uwtable +define dso_local void @"?func2@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { + %1 = alloca %struct.A, align 32 + %2 = call %struct.A* @"??0A@@QEAA@XZ"(%struct.A* %1) #3 + invoke void @"?func3@@YAXXZ"() + to label %3 unwind label %4 + +3: ; preds = %0 + call void @"??1A@@QEAA@XZ"(%struct.A* %1) #3 + ret void + +4: ; preds = %0 + %5 = cleanuppad within none [] + call void @"??1A@@QEAA@XZ"(%struct.A* %1) #3 [ "funclet"(token %5) ] + cleanupret from %5 unwind to caller +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir index 2e96ee4e881f..441eec00d5e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -396,10 +396,10 @@ body: | ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %6(s16) - ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fmad_s16_denorm ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -457,10 +457,10 @@ body: | ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %6(s16) - ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC2]] + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fmad_s16_denorm_flags ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -550,16 +550,16 @@ body: | ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %10(s16) - ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %11(s16) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX7: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC5]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC4]] ; GFX10-LABEL: name: test_fmad_v2s16_denorm ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -659,16 +659,16 @@ body: | ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %10(s16) - ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %11(s16) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC4]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC5]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX7: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX7: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] - ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC5]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC4]] ; GFX10-LABEL: name: test_fmad_v2s16_denorm_flags ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -820,26 +820,26 @@ body: | ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %16(s16) - ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %17(s16) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC8]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC9]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX7: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT %18(s16) - ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT %19(s16) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; GFX7: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX7: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC10]] - ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC9]] - ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC8]] ; GFX10-LABEL: name: test_fmad_v4s16_denorm ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -1015,26 +1015,26 @@ body: | ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %16(s16) - ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT %17(s16) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC8]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC9]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC10]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC11]] + ; GFX7: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16) + ; GFX7: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) ; GFX7: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX7: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] ; GFX7: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT %18(s16) - ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT %19(s16) + ; GFX7: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16) + ; GFX7: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) ; GFX7: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; GFX7: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; GFX7: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] - ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC11]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC10]] - ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] - ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC9]] - ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] - ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC8]] ; GFX10-LABEL: name: test_fmad_v4s16_denorm_flags ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir index 32738d499bda..95515f3593ac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -252,23 +252,23 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6: $vgpr0 = COPY %3(s32) ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX6: $vgpr0 = COPY [[FADD]](s32) ; GFX7-LABEL: name: test_fmad_s32_denorm ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7: $vgpr0 = COPY %3(s32) ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX7: $vgpr0 = COPY [[FADD]](s32) ; GFX10-LABEL: name: test_fmad_s32_denorm ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: $vgpr0 = COPY %3(s32) ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -291,23 +291,23 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX6: $vgpr0 = COPY %3(s32) ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX6: $vgpr0 = COPY [[FADD]](s32) ; GFX7-LABEL: name: test_fmad_s32_flags_denorm ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX7: $vgpr0 = COPY %3(s32) ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX7: $vgpr0 = COPY [[FADD]](s32) ; GFX10-LABEL: name: test_fmad_s32_flags_denorm ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: $vgpr0 = COPY %3(s32) ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -333,12 +333,12 @@ body: | ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] ; GFX7-LABEL: name: test_fmad_v2s32_denorm ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -346,12 +346,12 @@ body: | ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] ; GFX10-LABEL: name: test_fmad_v2s32_denorm ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -359,12 +359,12 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -390,14 +390,14 @@ body: | ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX7-LABEL: name: test_fmad_v3s32_denorm ; GFX7: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX7: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -405,14 +405,14 @@ body: | ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX7: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) - ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fmad_v3s32_denorm ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -420,14 +420,14 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) - ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -453,16 +453,16 @@ body: | ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] ; GFX7-LABEL: name: test_fmad_v4s32_denorm ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -470,16 +470,16 @@ body: | ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; GFX7: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] - ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] - ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] - ; GFX7: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] ; GFX10-LABEL: name: test_fmad_v4s32_denorm ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -487,16 +487,16 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] + ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] - ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir index 6230a3728273..d9204693d518 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -301,14 +301,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -443,10 +447,6 @@ body: | ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -454,14 +454,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -596,10 +600,6 @@ body: | ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -607,14 +607,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -749,10 +753,6 @@ body: | ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UDIV %0, %1 @@ -774,14 +774,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -919,14 +923,18 @@ body: | ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1059,14 +1067,6 @@ body: | ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1076,14 +1076,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -1221,14 +1225,18 @@ body: | ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1361,14 +1369,6 @@ body: | ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1378,14 +1378,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %250(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %258(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -1523,14 +1527,18 @@ body: | ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV22]](s32) ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV23]](s32) ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C9]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1663,14 +1671,6 @@ body: | ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UDIV %0, %1 @@ -2300,14 +2300,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2443,10 +2447,6 @@ body: | ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -2459,14 +2459,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2602,10 +2606,6 @@ body: | ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -2618,14 +2618,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2761,10 +2765,6 @@ body: | ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir index dfc52a8a7ce5..e42fe1400477 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -295,14 +295,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -430,10 +434,6 @@ body: | ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -441,14 +441,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -576,10 +580,6 @@ body: | ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -587,14 +587,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %8(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %16(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -722,10 +726,6 @@ body: | ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C5]] ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT3]](s64) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UREM %0, %1 @@ -747,14 +747,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -885,14 +889,18 @@ body: | ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C8]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1019,14 +1027,6 @@ body: | ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1036,14 +1036,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -1174,14 +1178,18 @@ body: | ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C8]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1308,14 +1316,6 @@ body: | ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1325,14 +1325,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV4]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV5]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %234(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C2]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C2]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %242(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) @@ -1463,14 +1467,18 @@ body: | ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV18]](s32) ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV19]](s32) ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C7]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[C8]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL3]] + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C8]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64) ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) @@ -1597,14 +1605,6 @@ body: | ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV3]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT3]](s64), [[SELECT7]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C9]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[FMUL2]] - ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C6]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL5]], [[UITOFP2]] - ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C3]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL]] - ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[UITOFP]] %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UREM %0, %1 @@ -2222,14 +2222,18 @@ body: | ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2358,10 +2362,6 @@ body: | ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX8-LABEL: name: test_urem_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -2374,14 +2374,18 @@ body: | ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2510,10 +2514,6 @@ body: | ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] ; GFX9-LABEL: name: test_urem_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -2526,14 +2526,18 @@ body: | ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV]](s32) ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG %14(s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[C3]] - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL1]] + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI %22(s32) + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) @@ -2662,10 +2666,6 @@ body: | ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV]] ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SELECT3]](s64) ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL]] - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UITOFP]] %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll index f6d16e87dd02..d200f7c5c306 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll @@ -6,20 +6,17 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9-LABEL: sample_l_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX9-NEXT: s_lshl_b32 s12, s0, 16 ; GFX9-NEXT: s_mov_b32 s1, s3 +; GFX9-NEXT: s_mov_b32 s2, s4 ; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: s_mov_b32 s4, s6 ; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s6, s8 ; GFX9-NEXT: s_mov_b32 s7, s9 +; GFX9-NEXT: s_mov_b32 s8, s10 ; GFX9-NEXT: s_mov_b32 s9, s11 +; GFX9-NEXT: s_mov_b32 s10, s12 ; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v0, v0, v1, s12 ; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog @@ -27,19 +24,17 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10-LABEL: sample_l_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_lshl_b32 s12, s0, 16 ; GFX10-NEXT: s_mov_b32 s1, s3 +; GFX10-NEXT: s_mov_b32 s2, s4 ; GFX10-NEXT: s_mov_b32 s3, s5 +; GFX10-NEXT: s_mov_b32 s4, s6 ; GFX10-NEXT: s_mov_b32 s5, s7 +; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: s_mov_b32 s8, s10 ; GFX10-NEXT: s_mov_b32 s9, s11 +; GFX10-NEXT: s_mov_b32 s10, s12 ; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, s12 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll index 9f638dbb4d7a..516e92e08b16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll @@ -1,404 +1,277 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=legalizer -o - %s | FileCheck -check-prefix=GCN %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GCN %s define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { - ; GCN-LABEL: name: sample_l_1d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[COPY12]](s32), 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_l_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { - ; GCN-LABEL: name: sample_l_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[BUILD_VECTOR2]](<2 x s32>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { - ; GCN-LABEL: name: sample_c_l_1d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[BUILD_VECTOR2]](<2 x s32>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_c_l_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { - ; GCN-LABEL: name: sample_c_l_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BUILD_VECTOR2]](<3 x s32>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_c_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { - ; GCN-LABEL: name: sample_l_o_1d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.o.1d), 15, [[BUILD_VECTOR2]](<2 x s32>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_l_o_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { - ; GCN-LABEL: name: sample_l_o_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.o.2d), 15, [[BUILD_VECTOR2]](<3 x s32>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_l_o_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { - ; GCN-LABEL: name: sample_c_l_o_1d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.o.1d), 15, [[BUILD_VECTOR2]](<3 x s32>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_c_l_o_1d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { - ; GCN-LABEL: name: sample_c_l_o_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.o.2d), 15, [[BUILD_VECTOR2]](<4 x s32>), $noreg, $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: sample_c_l_o_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { - ; GCN-LABEL: name: gather4_l_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4.l.2d), 15, [[BUILD_VECTOR2]](<2 x s32>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: gather4_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { - ; GCN-LABEL: name: gather4_c_l_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4.c.l.2d), 15, [[BUILD_VECTOR2]](<3 x s32>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: gather4_c_l_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { - ; GCN-LABEL: name: gather4_l_o_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4.l.o.2d), 15, [[BUILD_VECTOR2]](<3 x s32>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: gather4_l_o_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v } define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { - ; GCN-LABEL: name: gather4_c_l_o_2d - ; GCN: bb.1.main_body: - ; GCN: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GCN: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4.c.l.o.2d), 15, [[BUILD_VECTOR2]](<4 x s32>), $noreg, $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: $vgpr0 = COPY [[UV]](s32) - ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: $vgpr2 = COPY [[UV2]](s32) - ; GCN: $vgpr3 = COPY [[UV3]](s32) - ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +; GCN-LABEL: gather4_c_l_o_2d: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: s_mov_b32 s1, s3 +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s7 +; GCN-NEXT: s_mov_b32 s6, s8 +; GCN-NEXT: s_mov_b32 s7, s9 +; GCN-NEXT: s_mov_b32 s8, s10 +; GCN-NEXT: s_mov_b32 s9, s11 +; GCN-NEXT: s_mov_b32 s10, s12 +; GCN-NEXT: s_mov_b32 s11, s13 +; GCN-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) ret <4 x float> %v diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll index 900521664e0c..db5fb63f4e76 100644 --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -13,7 +13,7 @@ define float @sqrt_div(float %a, float %b) { } ; CHECK-LABEL: sqrt_div_fast( -; CHECK: sqrt.approx.f32 +; CHECK: sqrt.rn.f32 ; CHECK: div.approx.f32 define float @sqrt_div_fast(float %a, float %b) #0 { %t1 = tail call float @llvm.sqrt.f32(float %a) @@ -21,6 +21,15 @@ define float @sqrt_div_fast(float %a, float %b) #0 { ret float %t2 } +; CHECK-LABEL: sqrt_div_fast_ninf( +; CHECK: sqrt.approx.f32 +; CHECK: div.approx.f32 +define float @sqrt_div_fast_ninf(float %a, float %b) #0 { + %t1 = tail call ninf float @llvm.sqrt.f32(float %a) + %t2 = fdiv float %t1, %b + ret float %t2 +} + ; CHECK-LABEL: sqrt_div_ftz( ; CHECK: sqrt.rn.ftz.f32 ; CHECK: div.rn.ftz.f32 @@ -31,7 +40,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 { } ; CHECK-LABEL: sqrt_div_fast_ftz( -; CHECK: sqrt.approx.ftz.f32 +; CHECK: sqrt.rn.ftz.f32 ; CHECK: div.approx.ftz.f32 define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { %t1 = tail call float @llvm.sqrt.f32(float %a) @@ -39,12 +48,20 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { ret float %t2 } +; CHECK-LABEL: sqrt_div_fast_ftz_ninf( +; CHECK: sqrt.approx.ftz.f32 +; CHECK: div.approx.ftz.f32 +define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 { + %t1 = tail call ninf float @llvm.sqrt.f32(float %a) + %t2 = fdiv float %t1, %b + ret float %t2 +} + ; There are no fast-math or ftz versions of sqrt and div for f64. We use ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide. ; CHECK-LABEL: sqrt_div_fast_ftz_f64( -; CHECK: rsqrt.approx.f64 -; CHECK: rcp.approx.ftz.f64 +; CHECK: sqrt.rn.f64 ; CHECK: div.rn.f64 define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { %t1 = tail call double @llvm.sqrt.f64(double %a) @@ -52,6 +69,16 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { ret double %t2 } +; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf( +; CHECK: rsqrt.approx.f64 +; CHECK: rcp.approx.ftz.f64 +; CHECK: div.rn.f64 +define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 { + %t1 = tail call ninf double @llvm.sqrt.f64(double %a) + %t2 = fdiv double %t1, %b + ret double %t2 +} + ; CHECK-LABEL: rsqrt( ; CHECK-NOT: rsqrt.approx ; CHECK: sqrt.rn.f32 diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll index a8590b7c43ab..465b696c7610 100644 --- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll +++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll @@ -45,35 +45,63 @@ define double @test_rsqrt64_ftz(double %a) #0 #1 { ; CHECK-LABEL test_sqrt32 define float @test_sqrt32(float %a) #0 { -; CHECK: sqrt.approx.f32 +; CHECK: sqrt.rn.f32 %ret = tail call float @llvm.sqrt.f32(float %a) ret float %ret } +; CHECK-LABEL test_sqrt32_ninf +define float @test_sqrt32_ninf(float %a) #0 { +; CHECK: sqrt.approx.f32 + %ret = tail call ninf float @llvm.sqrt.f32(float %a) + ret float %ret +} + ; CHECK-LABEL test_sqrt_ftz define float @test_sqrt_ftz(float %a) #0 #1 { -; CHECK: sqrt.approx.ftz.f32 +; CHECK: sqrt.rn.ftz.f32 %ret = tail call float @llvm.sqrt.f32(float %a) ret float %ret } +; CHECK-LABEL test_sqrt_ftz_ninf +define float @test_sqrt_ftz_ninf(float %a) #0 #1 { +; CHECK: sqrt.approx.ftz.f32 + %ret = tail call ninf float @llvm.sqrt.f32(float %a) + ret float %ret +} + ; CHECK-LABEL test_sqrt64 define double @test_sqrt64(double %a) #0 { +; CHECK: sqrt.rn.f64 + %ret = tail call double @llvm.sqrt.f64(double %a) + ret double %ret +} + +; CHECK-LABEL test_sqrt64_ninf +define double @test_sqrt64_ninf(double %a) #0 { ; There's no sqrt.approx.f64 instruction; we emit ; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal, ; so we just use the ftz version. ; CHECK: rsqrt.approx.f64 ; CHECK: rcp.approx.ftz.f64 - %ret = tail call double @llvm.sqrt.f64(double %a) + %ret = tail call ninf double @llvm.sqrt.f64(double %a) ret double %ret } ; CHECK-LABEL test_sqrt64_ftz define double @test_sqrt64_ftz(double %a) #0 #1 { +; CHECK: sqrt.rn.f64 + %ret = tail call double @llvm.sqrt.f64(double %a) + ret double %ret +} + +; CHECK-LABEL test_sqrt64_ftz_ninf +define double @test_sqrt64_ftz_ninf(double %a) #0 #1 { ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version. ; CHECK: rsqrt.approx.f64 ; CHECK: rcp.approx.ftz.f64 - %ret = tail call double @llvm.sqrt.f64(double %a) + %ret = tail call ninf double @llvm.sqrt.f64(double %a) ret double %ret } @@ -92,11 +120,18 @@ define float @test_rsqrt32_refined(float %a) #0 #2 { ; CHECK-LABEL: test_sqrt32_refined define float @test_sqrt32_refined(float %a) #0 #2 { -; CHECK: rsqrt.approx.f32 +; CHECK: sqrt.rn.f32 %ret = tail call float @llvm.sqrt.f32(float %a) ret float %ret } +; CHECK-LABEL: test_sqrt32_refined_ninf +define float @test_sqrt32_refined_ninf(float %a) #0 #2 { +; CHECK: rsqrt.approx.f32 + %ret = tail call ninf float @llvm.sqrt.f32(float %a) + ret float %ret +} + ; CHECK-LABEL: test_rsqrt64_refined define double @test_rsqrt64_refined(double %a) #0 #2 { ; CHECK: rsqrt.approx.f64 @@ -107,11 +142,18 @@ define double @test_rsqrt64_refined(double %a) #0 #2 { ; CHECK-LABEL: test_sqrt64_refined define double @test_sqrt64_refined(double %a) #0 #2 { -; CHECK: rsqrt.approx.f64 +; CHECK: sqrt.rn.f64 %ret = tail call double @llvm.sqrt.f64(double %a) ret double %ret } +; CHECK-LABEL: test_sqrt64_refined_ninf +define double @test_sqrt64_refined_ninf(double %a) #0 #2 { +; CHECK: rsqrt.approx.f64 + %ret = tail call ninf double @llvm.sqrt.f64(double %a) + ret double %ret +} + ; -- refined sqrt and rsqrt with ftz enabled -- ; CHECK-LABEL: test_rsqrt32_refined_ftz @@ -124,11 +166,18 @@ define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 { ; CHECK-LABEL: test_sqrt32_refined_ftz define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 { -; CHECK: rsqrt.approx.ftz.f32 +; CHECK: sqrt.rn.ftz.f32 %ret = tail call float @llvm.sqrt.f32(float %a) ret float %ret } +; CHECK-LABEL: test_sqrt32_refined_ftz_ninf +define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 { +; CHECK: rsqrt.approx.ftz.f32 + %ret = tail call ninf float @llvm.sqrt.f32(float %a) + ret float %ret +} + ; CHECK-LABEL: test_rsqrt64_refined_ftz define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version. @@ -140,11 +189,18 @@ define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { ; CHECK-LABEL: test_sqrt64_refined_ftz define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 { -; CHECK: rsqrt.approx.f64 +; CHECK: sqrt.rn.f64 %ret = tail call double @llvm.sqrt.f64(double %a) ret double %ret } +; CHECK-LABEL: test_sqrt64_refined_ftz_ninf +define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 { +; CHECK: rsqrt.approx.f64 + %ret = tail call ninf double @llvm.sqrt.f64(double %a) + ret double %ret +} + attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" } attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" } diff --git a/llvm/test/CodeGen/PowerPC/aix-return55.ll b/llvm/test/CodeGen/PowerPC/aix-return55.ll index a09857169a30..a18a211b46b2 100644 --- a/llvm/test/CodeGen/PowerPC/aix-return55.ll +++ b/llvm/test/CodeGen/PowerPC/aix-return55.ll @@ -31,7 +31,7 @@ entry: ;CHECKOBJ-NEXT: 18: 00 01 23 45 ;CHECKOBJ-NEXT: 1c: 67 8a bc de oris 10, 28, 48350{{[[:space:]] *}} ;CHECKOBJ-NEXT: 00000020 : -;CHECKOBJ-NEXT: 20: 40 14 00 00 bdnzf 20, .+0 +;CHECKOBJ-NEXT: 20: 40 14 00 00 bdnzf 20, 0x20 ;CHECKOBJ-NEXT: 24: 00 00 00 00 {{[[:space:]] *}} ;CHECKOBJ-NEXT: 00000028 : ;CHECKOBJ-NEXT: 28: 00 00 00 00 diff --git a/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir b/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir index 2ec09b0fb26b..56ddb2dc033b 100644 --- a/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir +++ b/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir @@ -70,12 +70,12 @@ body: | ... # Check for the long branch. -# CHECK-LE: 08 00 82 4{{[01]}} b{{[tf]}} 2, .+8 +# CHECK-LE: 08 00 82 4{{[01]}} b{{[tf]}} 2, 0xc # CHECK-LE-NEXT: fc 7f 00 48 b .+32764 # CHECK-LE-DAG: paddi 3, 3, 13, 0 # CHECK-LE-DAG: paddi 3, 3, 21, 0 # CHECK-LE: blr -# CHECK-BE: 4{{[01]}} 82 00 08 b{{[tf]}} 2, .+8 +# CHECK-BE: 4{{[01]}} 82 00 08 b{{[tf]}} 2, 0xc # CHECK-BE-NEXT: 48 00 7f fc b .+32764 # CHECK-BE-DAG: paddi 3, 3, 13, 0 # CHECK-BE-DAG: paddi 3, 3, 21, 0 diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 222583638d59..59a7d233c0c3 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -270,11 +270,11 @@ define float @fmul_fma_fast2(float %x) { ; Reduced precision for sqrt is allowed - should use estimate and NR iterations. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' -; FMFDEBUG: fmul afn {{t[0-9]+}} +; FMFDEBUG: fmul ninf afn {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:' -; GLOBALDEBUG: fmul afn {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:' define float @sqrt_afn_ieee(float %x) #0 { @@ -321,17 +321,31 @@ define float @sqrt_afn_ieee(float %x) #0 { ; GLOBAL-NEXT: xsmulsp 0, 0, 2 ; GLOBAL-NEXT: .LBB10_2: ; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: blr + %rt = call afn ninf float @llvm.sqrt.f32(float %x) + ret float %rt +} + +define float @sqrt_afn_ieee_inf(float %x) #0 { +; FMF-LABEL: sqrt_afn_ieee_inf: +; FMF: # %bb.0: +; FMF-NEXT: xssqrtsp 1, 1 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: sqrt_afn_ieee_inf: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xssqrtsp 1, 1 ; GLOBAL-NEXT: blr %rt = call afn float @llvm.sqrt.f32(float %x) ret float %rt } ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' -; FMFDEBUG: fmul afn {{t[0-9]+}} +; FMFDEBUG: fmul ninf afn {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:' -; GLOBALDEBUG: fmul afn {{t[0-9]+}} +; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:' define float @sqrt_afn_preserve_sign(float %x) #1 { @@ -339,19 +353,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: beq 0, .LBB12_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmulsp 1, 1, 2 ; FMF-NEXT: xsaddsp 0, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 -; FMF-NEXT: .LBB11_2: +; FMF-NEXT: .LBB12_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -359,19 +373,33 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: beq 0, .LBB12_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: .LBB12_2: ; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: blr + %rt = call afn ninf float @llvm.sqrt.f32(float %x) + ret float %rt +} + +define float @sqrt_afn_preserve_sign_inf(float %x) #1 { +; FMF-LABEL: sqrt_afn_preserve_sign_inf: +; FMF: # %bb.0: +; FMF-NEXT: xssqrtsp 1, 1 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xssqrtsp 1, 1 ; GLOBAL-NEXT: blr %rt = call afn float @llvm.sqrt.f32(float %x) ret float %rt @@ -390,45 +418,45 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { define float @sqrt_fast_ieee(float %x) #0 { ; FMF-LABEL: sqrt_fast_ieee: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI12_2@toc@ha +; FMF-NEXT: addis 3, 2, .LCPI14_2@toc@ha ; FMF-NEXT: fabs 0, 1 -; FMF-NEXT: lfs 2, .LCPI12_2@toc@l(3) +; FMF-NEXT: lfs 2, .LCPI14_2@toc@l(3) ; FMF-NEXT: fcmpu 0, 0, 2 ; FMF-NEXT: xxlxor 0, 0, 0 -; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: blt 0, .LBB14_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI14_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI14_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI14_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB14_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI12_2@toc@ha +; GLOBAL-NEXT: addis 3, 2, .LCPI14_2@toc@ha ; GLOBAL-NEXT: fabs 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI12_2@toc@l(3) +; GLOBAL-NEXT: lfs 2, .LCPI14_2@toc@l(3) ; GLOBAL-NEXT: fcmpu 0, 0, 2 ; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: blt 0, .LBB14_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI14_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI14_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI14_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB14_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -450,18 +478,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB13_2 +; FMF-NEXT: beq 0, .LBB15_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI13_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI13_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI15_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI15_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI15_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB13_2: +; FMF-NEXT: .LBB15_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -469,18 +497,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 +; GLOBAL-NEXT: beq 0, .LBB15_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI13_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI13_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI15_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI15_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: +; GLOBAL-NEXT: .LBB15_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -502,10 +530,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB14_2 +; FMF-NEXT: blt 0, .LBB16_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB14_2: +; FMF-NEXT: .LBB16_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -513,10 +541,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB14_2 +; GLOBAL-NEXT: blt 0, .LBB16_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB14_2: +; GLOBAL-NEXT: .LBB16_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0 diff --git a/llvm/test/CodeGen/PowerPC/mi-simplify-code.mir b/llvm/test/CodeGen/PowerPC/mi-simplify-code.mir new file mode 100644 index 000000000000..15c1c4e1ef7e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mi-simplify-code.mir @@ -0,0 +1,63 @@ +# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \ +# RUN: -verify-machineinstrs -start-before=ppc-mi-peepholes | FileCheck %s + +--- +name: remove_frsp +alignment: 16 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4 + + %1:g8rc = COPY $x4 + %0:g8rc_and_g8rc_nox0 = COPY $x3 + %2:g8rc = RLDICR %1, 2, 61 + %3:f8rc, %4:g8rc_and_g8rc_nox0 = LFSUX %0, killed %2 + %5:f4rc = FRSP killed %3, implicit $rm + %22:vslrc = SUBREG_TO_REG 1, %5, %subreg.sub_64 + %7:g8rc = LI8 8 + %8:vssrc = XFLOADf32 %4, killed %7 + %23:vslrc = SUBREG_TO_REG 1, %8, %subreg.sub_64 + %10:vsrc = XXPERMDI %23, %22, 0 + %11:vrrc = XVCVDPSP killed %10, implicit $rm + $v2 = COPY %11 + BLR8 implicit $lr8, implicit $rm, implicit $v2 +... +# CHECK-LABEL: remove_frsp +# CHECK: sldi 4, 4, 2 +# CHECK-NEXT: lfsux 0, 3, 4 +# CHECK-NOT: frsp +# CHECK-NEXT: lfs 1, 8(3) +# CHECK-NEXT: xxmrghd 0, 1, 0 +# CHECK-NEXT: xvcvdpsp 34, 0 +# CHECK-NEXT: blr + +--- +name: remove_xsrsp +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4 + + %1:g8rc = COPY $x4 + %0:g8rc_and_g8rc_nox0 = COPY $x3 + %2:g8rc = RLDICR %1, 2, 61 + %3:f8rc, %4:g8rc_and_g8rc_nox0 = LFSUX %0, killed %2 + %5:vssrc = XSRSP killed %3 + %22:vslrc = SUBREG_TO_REG 1, %5, %subreg.sub_64 + %7:g8rc = LI8 8 + %8:vssrc = XFLOADf32 %4, killed %7 + %23:vslrc = SUBREG_TO_REG 1, %8, %subreg.sub_64 + %10:vsrc = XXPERMDI %23, %22, 0 + %11:vrrc = XVCVDPSP killed %10, implicit $rm + $v2 = COPY %11 + BLR8 implicit $lr8, implicit $rm, implicit $v2 +... +# CHECK-LABEL: remove_xsrsp +# CHECK: sldi 4, 4, 2 +# CHECK-NEXT: lfsux 0, 3, 4 +# CHECK-NEXT: xsrsp 0, 0 +# CHECK-NEXT: lfs 1, 8(3) +# CHECK-NEXT: xxmrghd 0, 1, 0 +# CHECK-NEXT: xvcvdpsp 34, 0 +# CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/pr45297.ll b/llvm/test/CodeGen/PowerPC/pr45297.ll index 5bd5df543950..39583d5a04cc 100644 --- a/llvm/test/CodeGen/PowerPC/pr45297.ll +++ b/llvm/test/CodeGen/PowerPC/pr45297.ll @@ -1,11 +1,20 @@ -; RUN: not --crash llc -verify-machineinstrs \ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec \ ; RUN: -mattr=-power8-vector -mattr=-vsx < %s 2>&1 | FileCheck %s -; CHECK: LLVM ERROR: Cannot select: {{.*}}: ch = PPCISD::ST_VSR_SCAL_INT<(store 4 into @Global)> @Global = dso_local global i32 55, align 4 define dso_local void @test(float %0) local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fctiwz f0, f1 +; CHECK-NEXT: addi r3, r1, -4 +; CHECK-NEXT: addis r4, r2, Global@toc@ha +; CHECK-NEXT: stfiwx f0, 0, r3 +; CHECK-NEXT: lwz r3, -4(r1) +; CHECK-NEXT: stw r3, Global@toc@l(r4) +; CHECK-NEXT: blr entry: %1 = fptosi float %0 to i32 store i32 %1, i32* @Global, align 4 diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index 43da05ebe7c7..f2691ba1a771 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -1628,6 +1628,7 @@ define void @cmpxchg_i32_monotonic_monotonic(i32* %ptr, i32 %cmp, i32 %val) noun ; ; RV64IA-LABEL: cmpxchg_i32_monotonic_monotonic: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB20_3 @@ -1680,6 +1681,7 @@ define void @cmpxchg_i32_acquire_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwi ; ; RV64IA-LABEL: cmpxchg_i32_acquire_monotonic: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB21_3 @@ -1732,6 +1734,7 @@ define void @cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-LABEL: cmpxchg_i32_acquire_acquire: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB22_3 @@ -1784,6 +1787,7 @@ define void @cmpxchg_i32_release_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwi ; ; RV64IA-LABEL: cmpxchg_i32_release_monotonic: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB23_3 @@ -1836,6 +1840,7 @@ define void @cmpxchg_i32_release_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-LABEL: cmpxchg_i32_release_acquire: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB24_3 @@ -1888,6 +1893,7 @@ define void @cmpxchg_i32_acq_rel_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwi ; ; RV64IA-LABEL: cmpxchg_i32_acq_rel_monotonic: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB25_3 @@ -1940,6 +1946,7 @@ define void @cmpxchg_i32_acq_rel_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-LABEL: cmpxchg_i32_acq_rel_acquire: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB26_3 @@ -1992,6 +1999,7 @@ define void @cmpxchg_i32_seq_cst_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwi ; ; RV64IA-LABEL: cmpxchg_i32_seq_cst_monotonic: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB27_3 @@ -2044,6 +2052,7 @@ define void @cmpxchg_i32_seq_cst_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-LABEL: cmpxchg_i32_seq_cst_acquire: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB28_3 @@ -2096,6 +2105,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(i32* %ptr, i32 %cmp, i32 %val) nounwind ; ; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst: ; RV64IA: # %bb.0: +; RV64IA-NEXT: sext.w a1, a1 ; RV64IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a3, (a0) ; RV64IA-NEXT: bne a3, a1, .LBB29_3 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll index bc94f8ba9187..70fc0e4ab1cb 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll @@ -383,11 +383,9 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vpnot_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: vpnot_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vpt.s32 lt, q0, zr +; CHECK-NEXT: vpte.s32 lt, q0, zr ; CHECK-NEXT: vcmpt.s32 gt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vcmpt.i32 eq, q2, zr +; CHECK-NEXT: vcmpe.i32 eq, q2, zr ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: @@ -400,3 +398,73 @@ entry: %s = select <4 x i1> %o, <4 x i32> %a, <4 x i32> %b ret <4 x i32> %s } + +declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttet_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttet.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vcmpe.s32 gt, q1, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = icmp sgt <4 x i32> %y, zeroinitializer + %6 = and <4 x i1> %5, %4 + %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1) + ret <4 x i32> %7 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttee_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttee.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = xor <4 x i1> %3, + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1) + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5) + ret <4 x i32> %6 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttte_v4i1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov q3, q2 +; CHECK-NEXT: vpttte.s32 ge, q0, q2 +; CHECK-NEXT: vmaxt.s32 q3, q0, q1 +; CHECK-NEXT: vcmpt.s32 gt, q0, zr +; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmov q0, q3 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, i32 0, <4 x i1> %0, <4 x i32> %z) + %2 = icmp sgt <4 x i32> %x, zeroinitializer + %3 = and <4 x i1> %0, %2 + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %1) + %5 = xor <4 x i1> %3, + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4) + ret <4 x i32> %6 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll index ce82ba0909af..df211f1efebc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -19,11 +19,9 @@ define arm_aapcs_vfpcc void @thres_i32(i32* %data, i16 zeroext %N, i32 %T) { ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -77,11 +75,9 @@ define arm_aapcs_vfpcc void @thresh_i16(i16* %data, i16 zeroext %N, i16 signext ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB1_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -135,11 +131,9 @@ define arm_aapcs_vfpcc void @thresh_i8(i8* %data, i16 zeroext %N, i8 signext %T) ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -195,11 +189,9 @@ define arm_aapcs_vfpcc void @thresh_f32(float* %data, i16 zeroext %N, float %T) ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.f32 ge, q1, r2 +; CHECK-NEXT: vpte.f32 ge, q1, r2 ; CHECK-NEXT: vcmpt.f32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -255,11 +247,9 @@ define arm_aapcs_vfpcc void @thresh_f16(half* %data, i16 zeroext %N, float %T.co ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.f16 ge, q1, r2 +; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -318,11 +308,9 @@ define arm_aapcs_vfpcc void @thres_rev_i32(i32* %data, i16 zeroext %N, i32 %T) { ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.s32 ge, q1, r2 +; CHECK-NEXT: vpte.s32 ge, q1, r2 ; CHECK-NEXT: vcmpt.s32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB5_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -376,11 +364,9 @@ define arm_aapcs_vfpcc void @thresh_rev_i16(i16* %data, i16 zeroext %N, i16 sign ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.s16 ge, q1, r2 +; CHECK-NEXT: vpte.s16 ge, q1, r2 ; CHECK-NEXT: vcmpt.s16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -434,11 +420,9 @@ define arm_aapcs_vfpcc void @thresh_rev_i8(i8* %data, i16 zeroext %N, i8 signext ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u8 q1, [r0] -; CHECK-NEXT: vpt.s8 ge, q1, r2 +; CHECK-NEXT: vpte.s8 ge, q1, r2 ; CHECK-NEXT: vcmpt.s8 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.8 q0, [r0], #16 +; CHECK-NEXT: vstrbe.8 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -494,11 +478,9 @@ define arm_aapcs_vfpcc void @thresh_rev_f32(float* %data, i16 zeroext %N, float ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vpt.f32 ge, q1, r2 +; CHECK-NEXT: vpte.f32 ge, q1, r2 ; CHECK-NEXT: vcmpt.f32 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 +; CHECK-NEXT: vstrwe.32 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -554,11 +536,9 @@ define arm_aapcs_vfpcc void @thresh_rev_f16(half* %data, i16 zeroext %N, float % ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q1, [r0] -; CHECK-NEXT: vpt.f16 ge, q1, r2 +; CHECK-NEXT: vpte.f16 ge, q1, r2 ; CHECK-NEXT: vcmpt.f16 le, q1, r1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 +; CHECK-NEXT: vstrhe.16 q0, [r0], #16 ; CHECK-NEXT: le lr, .LBB9_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll new file mode 100644 index 000000000000..c32abb24dd87 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK + +define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32* nocapture %z, i32 %n) { +; CHECK-LABEL: test32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r5, lr} +; CHECK-NEXT: push {r5, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: .LBB0_1: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vldrw.u32 q3, [r1], #16 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vrev64.32 q1, q2 +; CHECK-NEXT: vrev64.32 q4, q3 +; CHECK-NEXT: vmov r12, s4 +; CHECK-NEXT: vmov lr, s16 +; CHECK-NEXT: smull r12, r5, lr, r12 +; CHECK-NEXT: lsrl r12, r5, #31 +; CHECK-NEXT: vmov.32 q0[0], r12 +; CHECK-NEXT: vmov r12, s6 +; CHECK-NEXT: vmov.32 q0[1], r5 +; CHECK-NEXT: vmov r5, s18 +; CHECK-NEXT: smull r12, r5, r5, r12 +; CHECK-NEXT: lsrl r12, r5, #31 +; CHECK-NEXT: vmov.32 q0[2], r12 +; CHECK-NEXT: vmov r12, s8 +; CHECK-NEXT: vmov.32 q0[3], r5 +; CHECK-NEXT: vmov r5, s12 +; CHECK-NEXT: smull r12, r5, r5, r12 +; CHECK-NEXT: lsrl r12, r5, #31 +; CHECK-NEXT: vmov.32 q1[0], r12 +; CHECK-NEXT: vmov r12, s10 +; CHECK-NEXT: vmov.32 q1[1], r5 +; CHECK-NEXT: vmov r5, s14 +; CHECK-NEXT: smull r12, r5, r5, r12 +; CHECK-NEXT: lsrl r12, r5, #31 +; CHECK-NEXT: vmov.32 q1[2], r12 +; CHECK-NEXT: vmov.32 q1[3], r5 +; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s5, s6 +; CHECK-NEXT: vmov.f32 s11, s3 +; CHECK-NEXT: vmov.f32 s6, s8 +; CHECK-NEXT: vmov.f32 s7, s10 +; CHECK-NEXT: vstrb.8 q1, [r2], #16 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r5, pc} +entry: + %0 = and i32 %n, 3 + %cmp = icmp eq i32 %0, 0 + %cmp113 = icmp sgt i32 %n, 0 + br i1 %cmp113, label %vector.body, label %for.cond.cleanup + +vector.body: ; preds = %vector.body, %entry + %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] + %1 = getelementptr inbounds i32, i32* %x, i32 %index + %2 = bitcast i32* %1 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %2, align 4 + %3 = shufflevector <4 x i32> %wide.load, <4 x i32> %wide.load, <2 x i32> + %4 = shufflevector <4 x i32> %wide.load, <4 x i32> %wide.load, <2 x i32> + %5 = sext <2 x i32> %3 to <2 x i64> + %6 = sext <2 x i32> %4 to <2 x i64> + %7 = getelementptr inbounds i32, i32* %y, i32 %index + %8 = bitcast i32* %7 to <4 x i32>* + %wide.load15 = load <4 x i32>, <4 x i32>* %8, align 4 + %9 = shufflevector <4 x i32> %wide.load15, <4 x i32> %wide.load15, <2 x i32> + %10 = shufflevector <4 x i32> %wide.load15, <4 x i32> %wide.load15, <2 x i32> + %11 = sext <2 x i32> %9 to <2 x i64> + %12 = sext <2 x i32> %10 to <2 x i64> + %13 = mul <2 x i64> %11, %5 + %14 = mul <2 x i64> %12, %6 + %15 = lshr <2 x i64> %13, + %16 = lshr <2 x i64> %14, + %17 = shufflevector <2 x i64> %15, <2 x i64> %16, <4 x i32> + %18 = trunc <4 x i64> %17 to <4 x i32> + %19 = getelementptr inbounds i32, i32* %z, i32 %index + %20 = bitcast i32* %19 to <4 x i32>* + store <4 x i32> %18, <4 x i32>* %20, align 4 + %index.next = add i32 %index, 4 + %21 = icmp eq i32 %index.next, %n + br i1 %21, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +define arm_aapcs_vfpcc void @test16(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i16* nocapture %z, i32 %n) { +; CHECK-LABEL: test16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrh.u16 q0, [r0], #16 +; CHECK-NEXT: vldrh.u16 q2, [r1], #16 +; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vmovlt.s16 q1, q0 +; CHECK-NEXT: vmovlt.s16 q3, q2 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: vmovlb.s16 q2, q2 +; CHECK-NEXT: vmul.i32 q1, q3, q1 +; CHECK-NEXT: vmul.i32 q0, q2, q0 +; CHECK-NEXT: vshr.u32 q1, q1, #15 +; CHECK-NEXT: vshr.u32 q0, q0, #15 +; CHECK-NEXT: vmovnt.i32 q0, q1 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bx lr +entry: + %0 = and i32 %n, 7 + %cmp = icmp eq i32 %0, 0 + %cmp113 = icmp sgt i32 %n, 0 + br i1 %cmp113, label %vector.body, label %for.cond.cleanup + +vector.body: ; preds = %vector.body, %entry + %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] + %1 = getelementptr inbounds i16, i16* %x, i32 %index + %2 = bitcast i16* %1 to <8 x i16>* + %wide.load = load <8 x i16>, <8 x i16>* %2, align 2 + %3 = shufflevector <8 x i16> %wide.load, <8 x i16> %wide.load, <4 x i32> + %4 = shufflevector <8 x i16> %wide.load, <8 x i16> %wide.load, <4 x i32> + %5 = sext <4 x i16> %3 to <4 x i32> + %6 = sext <4 x i16> %4 to <4 x i32> + %7 = getelementptr inbounds i16, i16* %y, i32 %index + %8 = bitcast i16* %7 to <8 x i16>* + %wide.load15 = load <8 x i16>, <8 x i16>* %8, align 2 + %9 = shufflevector <8 x i16> %wide.load15, <8 x i16> %wide.load15, <4 x i32> + %10 = shufflevector <8 x i16> %wide.load15, <8 x i16> %wide.load15, <4 x i32> + %11 = sext <4 x i16> %9 to <4 x i32> + %12 = sext <4 x i16> %10 to <4 x i32> + %13 = mul <4 x i32> %11, %5 + %14 = mul <4 x i32> %12, %6 + %15 = lshr <4 x i32> %13, + %16 = lshr <4 x i32> %14, + %17 = shufflevector <4 x i32> %15, <4 x i32> %16, <8 x i32> + %18 = trunc <8 x i32> %17 to <8 x i16> + %19 = getelementptr inbounds i16, i16* %z, i32 %index + %20 = bitcast i16* %19 to <8 x i16>* + store <8 x i16> %18, <8 x i16>* %20, align 2 + %index.next = add i32 %index, 8 + %21 = icmp eq i32 %index.next, %n + br i1 %21, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +define arm_aapcs_vfpcc void @test8(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i8* nocapture %z, i32 %n) { +; CHECK-LABEL: test8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB2_1: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrb.u8 q0, [r0], #16 +; CHECK-NEXT: vldrb.u8 q2, [r1], #16 +; CHECK-NEXT: subs r3, #16 +; CHECK-NEXT: vmovlt.u8 q1, q0 +; CHECK-NEXT: vmovlt.u8 q3, q2 +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: vmovlb.u8 q2, q2 +; CHECK-NEXT: vmul.i16 q1, q3, q1 +; CHECK-NEXT: vmul.i16 q0, q2, q0 +; CHECK-NEXT: vshr.u16 q1, q1, #7 +; CHECK-NEXT: vshr.u16 q0, q0, #7 +; CHECK-NEXT: vmovnt.i16 q0, q1 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: bne .LBB2_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: bx lr +entry: + %0 = and i32 %n, 15 + %cmp = icmp eq i32 %0, 0 + %cmp117 = icmp sgt i32 %n, 0 + br i1 %cmp117, label %vector.body, label %for.cond.cleanup + +vector.body: ; preds = %vector.body, %entry + %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] + %1 = getelementptr inbounds i8, i8* %x, i32 %index + %2 = bitcast i8* %1 to <16 x i8>* + %wide.load = load <16 x i8>, <16 x i8>* %2, align 1 + %3 = shufflevector <16 x i8> %wide.load, <16 x i8> %wide.load, <8 x i32> + %4 = shufflevector <16 x i8> %wide.load, <16 x i8> %wide.load, <8 x i32> + %5 = zext <8 x i8> %3 to <8 x i16> + %6 = zext <8 x i8> %4 to <8 x i16> + %7 = getelementptr inbounds i8, i8* %y, i32 %index + %8 = bitcast i8* %7 to <16 x i8>* + %wide.load19 = load <16 x i8>, <16 x i8>* %8, align 1 + %9 = shufflevector <16 x i8> %wide.load19, <16 x i8> %wide.load19, <8 x i32> + %10 = shufflevector <16 x i8> %wide.load19, <16 x i8> %wide.load19, <8 x i32> + %11 = zext <8 x i8> %9 to <8 x i16> + %12 = zext <8 x i8> %10 to <8 x i16> + %13 = mul <8 x i16> %11, %5 + %14 = mul <8 x i16> %12, %6 + %15 = lshr <8 x i16> %13, + %16 = lshr <8 x i16> %14, + %17 = shufflevector <8 x i16> %15, <8 x i16> %16, <16 x i32> + %18 = trunc <16 x i16> %17 to <16 x i8> + %19 = getelementptr inbounds i8, i8* %z, i32 %index + %20 = bitcast i8* %19 to <16 x i8>* + store <16 x i8> %18, <16 x i8>* %20, align 1 + %index.next = add i32 %index, 16 + %21 = icmp eq i32 %index.next, %n + br i1 %21, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir index 347c4870ab6a..7401d771d1b6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-3-blocks-kill-vpr.mir @@ -68,14 +68,10 @@ body: | ; CHECK: liveins: $q0, $q1, $q2, $r0 ; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 - ; CHECK: BUNDLE implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $vpr, implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q1, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $q3 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q1, renamable $q2, 1, renamable $vpr, killed renamable $q3 - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit $vpr, implicit killed $q3, implicit undef $q1 { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q3, renamable $q3, 1, renamable $vpr, undef renamable $q1 + ; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q3, internal renamable $q3, 2, internal renamable $vpr, undef renamable $q1 ; CHECK: } ; CHECK: $q3 = MVE_VORR $q0, $q0, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit killed $vpr, implicit killed $q1, implicit killed $q2, implicit killed $q3, implicit killed $q0 { diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir new file mode 100644 index 000000000000..765d3a4de831 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir @@ -0,0 +1,231 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @vpt_block_else(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2 + %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2 + %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %3 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + +... +--- +name: vpt_block_else +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$q3', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: vpt_block_else + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { + ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 6, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 11, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, killed renamable $vpr + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, renamable $q3 + $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q3 + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, killed renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + + tBX_RET 14, $noreg, implicit $q0 + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir index b3e953964b19..8bc7a0b53598 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-nots.mir @@ -61,14 +61,10 @@ body: | ; CHECK-LABEL: name: vpnot ; CHECK: liveins: $q0, $q1, $q2 - ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1 { - ; CHECK: MVE_VPTv4s32r 8, renamable $q0, $zr, 11, implicit-def $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit $q0, implicit $zr, implicit $q1, implicit killed $q2 { + ; CHECK: MVE_VPTv4s32r 12, renamable $q0, $zr, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, internal killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 @@ -244,14 +240,10 @@ body: | ; CHECK: liveins: $q0, $q1, $q2 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q0, $zr, 11, 0, $noreg ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr + ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit $q1, implicit $zr, implicit killed $q2 { + ; CHECK: MVE_VPST 12, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 1, killed renamable $vpr - ; CHECK: } - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg - ; CHECK: BUNDLE implicit-def $vpr, implicit killed $vpr, implicit killed $q2, implicit $zr { - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 1, killed renamable $vpr + ; CHECK: renamable $vpr = MVE_VCMPi32r killed renamable $q2, $zr, 0, 2, internal killed renamable $vpr ; CHECK: } ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg ; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index efdb2c6d684c..a4d8537343ca 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -729,18 +729,98 @@ terminate: ; preds = %entry unreachable } +%class.MyClass = type { i32 } + +; This crashed on debug mode (= when NDEBUG is not defined) when the logic for +; computing the innermost region was not correct, in which a loop region +; contains an exception region. This should pass CFGSort without crashing. +define void @test12() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %e = alloca %class.MyClass, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 9 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + invoke void @quux(i32 %i.0) + to label %for.inc unwind label %catch.dispatch + +catch.dispatch: ; preds = %for.body + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* bitcast ({ i8*, i8* }* @_ZTI7MyClass to i8*)] + %2 = call i8* @llvm.wasm.get.exception(token %1) + %3 = call i32 @llvm.wasm.get.ehselector(token %1) + %4 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI7MyClass to i8*)) #3 + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow + +catch: ; preds = %catch.start + %5 = call i8* @__cxa_get_exception_ptr(i8* %2) #3 [ "funclet"(token %1) ] + %6 = bitcast i8* %5 to %class.MyClass* + %call = call %class.MyClass* @_ZN7MyClassC2ERKS_(%class.MyClass* %e, %class.MyClass* dereferenceable(4) %6) [ "funclet"(token %1) ] + %7 = call i8* @__cxa_begin_catch(i8* %2) #3 [ "funclet"(token %1) ] + %x = getelementptr inbounds %class.MyClass, %class.MyClass* %e, i32 0, i32 0 + %8 = load i32, i32* %x, align 4 + invoke void @quux(i32 %8) [ "funclet"(token %1) ] + to label %invoke.cont2 unwind label %ehcleanup + +invoke.cont2: ; preds = %catch + %call3 = call %class.MyClass* @_ZN7MyClassD2Ev(%class.MyClass* %e) #3 [ "funclet"(token %1) ] + call void @__cxa_end_catch() [ "funclet"(token %1) ] + catchret from %1 to label %for.inc + +rethrow: ; preds = %catch.start + call void @llvm.wasm.rethrow.in.catch() #6 [ "funclet"(token %1) ] + unreachable + +for.inc: ; preds = %invoke.cont2, %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +ehcleanup: ; preds = %catch + %9 = cleanuppad within %1 [] + %call4 = call %class.MyClass* @_ZN7MyClassD2Ev(%class.MyClass* %e) #3 [ "funclet"(token %9) ] + invoke void @__cxa_end_catch() [ "funclet"(token %9) ] + to label %invoke.cont6 unwind label %terminate7 + +invoke.cont6: ; preds = %ehcleanup + cleanupret from %9 unwind to caller + +for.end: ; preds = %for.cond + ret void + +terminate7: ; preds = %ehcleanup + %10 = cleanuppad within %9 [] + %11 = call i8* @llvm.wasm.get.exception(token %10) + call void @__clang_call_terminate(i8* %11) #7 [ "funclet"(token %10) ] + unreachable +} + ; Check if the unwind destination mismatch stats are correct -; NOSORT-STAT: 11 wasm-cfg-stackify - Number of EH pad unwind mismatches found +; NOSORT-STAT: 14 wasm-cfg-stackify - Number of EH pad unwind mismatches found declare void @foo() declare void @bar() declare i32 @baz() +declare void @quux(i32) declare void @fun(i32) ; Function Attrs: nounwind declare void @nothrow(i32) #0 declare i32 @nothrow_i32() #0 + ; Function Attrs: nounwind declare %class.Object* @_ZN6ObjectD2Ev(%class.Object* returned) #0 +@_ZTI7MyClass = external constant { i8*, i8* }, align 4 +; Function Attrs: nounwind +declare %class.MyClass* @_ZN7MyClassD2Ev(%class.MyClass* returned) #0 +; Function Attrs: nounwind +declare %class.MyClass* @_ZN7MyClassC2ERKS_(%class.MyClass* returned, %class.MyClass* dereferenceable(4)) #0 + declare i32 @__gxx_wasm_personality_v0(...) declare i8* @llvm.wasm.get.exception(token) declare i32 @llvm.wasm.get.ehselector(token) @@ -748,6 +828,7 @@ declare void @llvm.wasm.rethrow.in.catch() declare i32 @llvm.eh.typeid.for(i8*) declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() +declare i8* @__cxa_get_exception_ptr(i8*) declare void @__clang_call_terminate(i8*) declare void @_ZSt9terminatev() ; Function Attrs: nounwind diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index cad6c4ac855b..ad42cc5f8615 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -308,6 +308,10 @@ attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } attributes #3 = { allocsize(0) } +; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_void" } +; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__cxa_find_matching_catch_3" } +; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_i8*_i32_%struct.__jmp_buf_tag*" } +; CHECK: attributes #{{[0-9]+}} = { "wasm-import-module"="env" "wasm-import-name"="__invoke_void_%struct.__jmp_buf_tag*_i32" } ; CHECK: attributes #[[ALLOCSIZE_ATTR]] = { allocsize(1) } !llvm.dbg.cu = !{!2} diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll index afb4acd736d3..e12a4152ac7e 100644 --- a/llvm/test/CodeGen/X86/combine-ptest.ll +++ b/llvm/test/CodeGen/X86/combine-ptest.ll @@ -9,10 +9,8 @@ define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d) @@ -25,11 +23,8 @@ define i32 @ptestz_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t1 = xor <4 x i64> %c, @@ -47,10 +42,8 @@ define i32 @ptestz_128_invert1(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_128_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vptest %xmm0, %xmm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %d, %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %t1) @@ -63,11 +56,8 @@ define i32 @ptestz_256_invert1(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_256_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vptest %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vptest %ymm0, %ymm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t1 = xor <4 x i64> %d, @@ -85,10 +75,8 @@ define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d) @@ -101,11 +89,8 @@ define i32 @ptestc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t1 = xor <4 x i64> %c, @@ -123,10 +108,8 @@ define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestnzc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d) @@ -139,9 +122,6 @@ define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { ; CHECK-LABEL: ptestnzc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper @@ -153,6 +133,21 @@ define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { ret i32 %t4 } +define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { +; CHECK-LABEL: ptestnzc_256_invert0_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: vptest %ymm1, %ymm0 +; CHECK-NEXT: cmoval %esi, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %t1 = xor <4 x i64> %c, + %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d) + %t3 = icmp eq i32 %t2, 0 + %t4 = select i1 %t3, i32 %a, i32 %b + ret i32 %t4 +} + ; ; testz(-1,X) -> testz(X,X) ; @@ -161,8 +156,7 @@ define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_128_allones0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vptest %xmm0, %xmm1 +; CHECK-NEXT: vptest %xmm0, %xmm0 ; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> , <2 x i64> %c) @@ -175,9 +169,7 @@ define i32 @ptestz_256_allones0(<4 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_256_allones0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vptest %ymm0, %ymm1 +; CHECK-NEXT: vptest %ymm0, %ymm0 ; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -195,8 +187,7 @@ define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_128_allones1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vptest %xmm1, %xmm0 +; CHECK-NEXT: vptest %xmm0, %xmm0 ; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> ) @@ -209,9 +200,7 @@ define i32 @ptestz_256_allones1(<4 x i64> %c, i32 %a, i32 %b) { ; CHECK-LABEL: ptestz_256_allones1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vptest %ymm1, %ymm0 +; CHECK-NEXT: vptest %ymm0, %ymm0 ; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -226,10 +215,8 @@ define zeroext i1 @PR38522(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK: # %bb.0: # %start ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: setb %al +; CHECK-NEXT: vptest %xmm0, %xmm0 +; CHECK-NEXT: sete %al ; CHECK-NEXT: retq start: %0 = load <16 x i8>, <16 x i8>* %x, align 16 diff --git a/llvm/test/CodeGen/X86/combine-testpd.ll b/llvm/test/CodeGen/X86/combine-testpd.ll index b43ac2a2ea0d..9ae3d80e59cd 100644 --- a/llvm/test/CodeGen/X86/combine-testpd.ll +++ b/llvm/test/CodeGen/X86/combine-testpd.ll @@ -9,10 +9,8 @@ define i32 @testpdz_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdz_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <2 x double> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -27,11 +25,8 @@ define i32 @testpdz_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdz_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <4 x double> %c to <4 x i64> @@ -51,10 +46,8 @@ define i32 @testpdz_128_invert1(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdz_128_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vtestpd %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vtestpd %xmm0, %xmm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <2 x double> %d to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -69,11 +62,8 @@ define i32 @testpdz_256_invert1(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdz_256_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vtestpd %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vtestpd %ymm0, %ymm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <4 x double> %d to <4 x i64> @@ -93,10 +83,8 @@ define i32 @testpdc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <2 x double> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -111,11 +99,8 @@ define i32 @testpdc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b ; CHECK-LABEL: testpdc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <4 x double> %c to <4 x i64> @@ -135,8 +120,6 @@ define i32 @testpdnzc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 ; CHECK-LABEL: testpdnzc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq @@ -153,9 +136,6 @@ define i32 @testpdnzc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 ; CHECK-LABEL: testpdnzc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll index 6b5e65322c64..f3605441348d 100644 --- a/llvm/test/CodeGen/X86/combine-testps.ll +++ b/llvm/test/CodeGen/X86/combine-testps.ll @@ -9,10 +9,8 @@ define i32 @testpsz_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsz_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <4 x float> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -27,11 +25,8 @@ define i32 @testpsz_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsz_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <8 x float> %c to <4 x i64> @@ -51,10 +46,8 @@ define i32 @testpsz_128_invert1(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsz_128_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vtestps %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vtestps %xmm0, %xmm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <4 x float> %d to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -69,11 +62,8 @@ define i32 @testpsz_256_invert1(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsz_256_invert1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vtestps %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: vtestps %ymm0, %ymm1 +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <8 x float> %d to <4 x i64> @@ -93,10 +83,8 @@ define i32 @testpsc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <4 x float> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -111,11 +99,8 @@ define i32 @testpsc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) ; CHECK-LABEL: testpsc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <8 x float> %c to <4 x i64> @@ -135,8 +120,6 @@ define i32 @testpsnzc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b ; CHECK-LABEL: testpsnzc_128_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq @@ -153,9 +136,6 @@ define i32 @testpsnzc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b ; CHECK-LABEL: testpsnzc_256_invert0: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll index d2d646248616..fbaf2d0f0914 100644 --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,XOP +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,AVX512 define <4 x i32> @rot_v4i32_splat(<4 x i32> %x) { @@ -77,10 +78,20 @@ define <4 x i32> @rot_v4i32_non_splat_2masks(<4 x i32> %x) { } define <4 x i32> @rot_v4i32_zero_non_splat(<4 x i32> %x) { -; CHECK-LABEL: rot_v4i32_zero_non_splat: -; CHECK: # %bb.0: -; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 -; CHECK-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_zero_non_splat: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_zero_non_splat: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vbroadcastss %xmm0, %xmm0 +; XOPAVX2-NEXT: retq +; +; AVX512-LABEL: rot_v4i32_zero_non_splat: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 +; AVX512-NEXT: retq %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> ) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %2 @@ -97,12 +108,19 @@ define <4 x i32> @rot_v4i32_allsignbits(<4 x i32> %x, <4 x i32> %y) { } define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) { -; XOP-LABEL: rot_v4i32_mask_ashr0: -; XOP: # %bb.0: -; XOP-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: vprotd $1, %xmm0, %xmm0 -; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_mask_ashr0: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_mask_ashr0: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr0: ; AVX512: # %bb.0: @@ -118,13 +136,21 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) { } define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) { -; XOP-LABEL: rot_v4i32_mask_ashr1: -; XOP: # %bb.0: -; XOP-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOP-NEXT: vprotd $1, %xmm0, %xmm0 -; XOP-NEXT: vpbroadcastd %xmm0, %xmm0 -; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq +; XOPAVX1-LABEL: rot_v4i32_mask_ashr1: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; XOPAVX1-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: rot_v4i32_mask_ashr1: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0 +; XOPAVX2-NEXT: vprotd $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr1: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll new file mode 100644 index 000000000000..7ddc0f1b2e4a --- /dev/null +++ b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +; Verify that we don't crash when compiling this. We used to hit an +; assert like this +; +; llc: ../include/llvm/CodeGen/ValueTypes.h:251: llvm::MVT llvm::EVT::getSimpleVT() const: Assertion `isSimple() && "Expected a SimpleValueType!"' failed. +; +; due to getFauxShuffleMask not checking that the VT was simple before a call +; to getSimpleValueType(). + +define i1 @dont_hit_assert(i24 signext %d) { +; CHECK-LABEL: dont_hit_assert: +; CHECK: # %bb.0: # %for.cond +; CHECK-NEXT: movb $-1, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +for.cond: + %t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0 + %t5 = icmp slt <8 x i24> %t0, zeroinitializer + %t7 = icmp slt i24 0, %d + %rdx.shuf = shufflevector <8 x i1> %t5, <8 x i1> undef, <8 x i32> + %bin.rdx = and <8 x i1> %t5, %rdx.shuf + %rdx.shuf22 = shufflevector <8 x i1> %bin.rdx, <8 x i1> undef, <8 x i32> + %bin.rdx23 = and <8 x i1> %bin.rdx, %rdx.shuf22 + %rdx.shuf24 = shufflevector <8 x i1> %bin.rdx23, <8 x i1> undef, <8 x i32> + %bin.rdx25 = and <8 x i1> %bin.rdx23, %rdx.shuf24 + %t8 = extractelement <8 x i1> %bin.rdx25, i32 0 + ret i1 %t8 +} diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 7be19c07da80..4483de105385 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -9,17 +9,30 @@ define float @sqrt_ieee(float %f) #0 { ; CHECK: liveins: $xmm0 ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr + ; CHECK: $xmm0 = COPY %1 + ; CHECK: RET 0, $xmm0 + %call = tail call float @llvm.sqrt.f32(float %f) + ret float %call +} + +define float @sqrt_ieee_ninf(float %f) #0 { + ; CHECK-LABEL: name: sqrt_ieee_ninf + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] - ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) - ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) - ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr - ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr - ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]] ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool) @@ -31,7 +44,7 @@ define float @sqrt_ieee(float %f) #0 { ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] ; CHECK: $xmm0 = COPY [[COPY5]] ; CHECK: RET 0, $xmm0 - %call = tail call float @llvm.sqrt.f32(float %f) + %call = tail call ninf float @llvm.sqrt.f32(float %f) ret float %call } @@ -41,17 +54,30 @@ define float @sqrt_daz(float %f) #1 { ; CHECK: liveins: $xmm0 ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr + ; CHECK: $xmm0 = COPY %1 + ; CHECK: RET 0, $xmm0 + %call = tail call float @llvm.sqrt.f32(float %f) + ret float %call +} + +define float @sqrt_daz_ninf(float %f) #1 { + ; CHECK-LABEL: name: sqrt_daz_ninf + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] - ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) - ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) - ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr - ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr - ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr - ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 ; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS ; CHECK: %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr @@ -60,7 +86,7 @@ define float @sqrt_daz(float %f) #1 { ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] ; CHECK: $xmm0 = COPY [[COPY3]] ; CHECK: RET 0, $xmm0 - %call = tail call float @llvm.sqrt.f32(float %f) + %call = tail call ninf float @llvm.sqrt.f32(float %f) ret float %call } diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll index f10199ce958f..b2593bc43578 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -59,6 +59,20 @@ define float @finite_f32_no_estimate(float %f) #0 { define float @finite_f32_estimate_ieee(float %f) #1 { ; SSE-LABEL: finite_f32_estimate_ieee: ; SSE: # %bb.0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: finite_f32_estimate_ieee: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %call = tail call float @__sqrtf_finite(float %f) #2 + ret float %call +} + +define float @finite_f32_estimate_ieee_ninf(float %f) #1 { +; SSE-LABEL: finite_f32_estimate_ieee_ninf: +; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: mulss %xmm1, %xmm2 @@ -72,7 +86,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 { ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: finite_f32_estimate_ieee: +; AVX1-LABEL: finite_f32_estimate_ieee_ninf: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -85,7 +99,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 { ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: finite_f32_estimate_ieee: +; AVX512-LABEL: finite_f32_estimate_ieee_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -99,13 +113,27 @@ define float @finite_f32_estimate_ieee(float %f) #1 { ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call float @__sqrtf_finite(float %f) #2 + %call = tail call ninf float @__sqrtf_finite(float %f) #2 ret float %call } define float @finite_f32_estimate_daz(float %f) #4 { ; SSE-LABEL: finite_f32_estimate_daz: ; SSE: # %bb.0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: finite_f32_estimate_daz: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %call = tail call float @__sqrtf_finite(float %f) #2 + ret float %call +} + +define float @finite_f32_estimate_daz_ninf(float %f) #4 { +; SSE-LABEL: finite_f32_estimate_daz_ninf: +; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: mulss %xmm1, %xmm2 @@ -119,7 +147,7 @@ define float @finite_f32_estimate_daz(float %f) #4 { ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: finite_f32_estimate_daz: +; AVX1-LABEL: finite_f32_estimate_daz_ninf: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -132,7 +160,7 @@ define float @finite_f32_estimate_daz(float %f) #4 { ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: finite_f32_estimate_daz: +; AVX512-LABEL: finite_f32_estimate_daz_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -144,7 +172,7 @@ define float @finite_f32_estimate_daz(float %f) #4 { ; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call float @__sqrtf_finite(float %f) #2 + %call = tail call ninf float @__sqrtf_finite(float %f) #2 ret float %call } @@ -175,6 +203,20 @@ define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 { define float @sqrtf_check_denorms(float %x) #3 { ; SSE-LABEL: sqrtf_check_denorms: ; SSE: # %bb.0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sqrtf_check_denorms: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %call = tail call float @__sqrtf_finite(float %x) #2 + ret float %call +} + +define float @sqrtf_check_denorms_ninf(float %x) #3 { +; SSE-LABEL: sqrtf_check_denorms_ninf: +; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: mulss %xmm1, %xmm2 @@ -188,7 +230,7 @@ define float @sqrtf_check_denorms(float %x) #3 { ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: sqrtf_check_denorms: +; AVX1-LABEL: sqrtf_check_denorms_ninf: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -201,7 +243,7 @@ define float @sqrtf_check_denorms(float %x) #3 { ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: sqrtf_check_denorms: +; AVX512-LABEL: sqrtf_check_denorms_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -215,13 +257,27 @@ define float @sqrtf_check_denorms(float %x) #3 { ; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} ; AVX512-NEXT: vmovaps %xmm1, %xmm0 ; AVX512-NEXT: retq - %call = tail call float @__sqrtf_finite(float %x) #2 + %call = tail call ninf float @__sqrtf_finite(float %x) #2 ret float %call } define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 { ; SSE-LABEL: sqrt_v4f32_check_denorms: ; SSE: # %bb.0: +; SSE-NEXT: sqrtps %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sqrt_v4f32_check_denorms: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtps %xmm0, %xmm0 +; AVX-NEXT: retq + %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 + ret <4 x float> %call +} + +define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 { +; SSE-LABEL: sqrt_v4f32_check_denorms_ninf: +; SSE: # %bb.0: ; SSE-NEXT: rsqrtps %xmm0, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: mulps %xmm2, %xmm1 @@ -237,7 +293,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 { ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: sqrt_v4f32_check_denorms: +; AVX1-LABEL: sqrt_v4f32_check_denorms_ninf: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtps %xmm0, %xmm1 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm2 @@ -251,7 +307,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 { ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: sqrt_v4f32_check_denorms: +; AVX512-LABEL: sqrt_v4f32_check_denorms_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtps %xmm0, %xmm1 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm2 @@ -266,7 +322,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 { ; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 + %call = tail call ninf <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2 ret <4 x float> %call } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 8c12c0d2e9fc..7db96f42b1cb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -6914,6 +6914,102 @@ define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_2 ret <16 x i16> %4 } +define <16 x i16> @shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) { +; AVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad $25, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; AVX1-NEXT: vpsrad $25, %xmm1, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpsrad $25, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpsrad $25, %ymm1, %ymm1 +; AVX2OR512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq +; +; XOPAVX1-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm2 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm3 +; XOPAVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; XOPAVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: shuffle_v16i16_ashr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrad $25, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpsrad $25, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq + %1 = ashr <8 x i32> %a0, + %2 = ashr <8 x i32> %a1, + %3 = bitcast <8 x i32> %1 to <16 x i16> + %4 = bitcast <8 x i32> %2 to <16 x i16> + %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> + ret <16 x i16> %5 +} + +define <16 x i16> @shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30(<8 x i32> %a0, <8 x i32> %a1) { +; AVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $25, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $25, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $25, %xmm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpsrld $25, %xmm1, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpsrld $25, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpsrld $25, %ymm1, %ymm1 +; AVX2OR512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq +; +; XOPAVX1-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm2 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; XOPAVX1-NEXT: vpsrld $25, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm3 +; XOPAVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; XOPAVX1-NEXT: vpsrld $25, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: shuffle_v16i16_lshr_00_02_04_06_16_18_20_22_08_10_12_14_24_26_28_30: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrld $25, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpsrld $25, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq + %1 = lshr <8 x i32> %a0, + %2 = lshr <8 x i32> %a1, + %3 = bitcast <8 x i32> %1 to <16 x i16> + %4 = bitcast <8 x i32> %2 to <16 x i16> + %5 = shufflevector <16 x i16> %3, <16 x i16> %4, <16 x i32> + ret <16 x i16> %5 +} + define <16 x i16> @shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13(<16 x i16> %a) { ; AVX1-LABEL: shuffle_v16i16_04_06_07_uu_uu_06_07_05_12_14_15_uu_uu_14_15_13: ; AVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 0e79116884f4..2601c7d4172d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -216,6 +216,58 @@ define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a ret <32 x i16> %shuffle } +define <32 x i16> @shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; KNL-LABEL: shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62: +; KNL: ## %bb.0: +; KNL-NEXT: vpsrad $25, %zmm0, %zmm0 +; KNL-NEXT: vpsrad $25, %zmm1, %zmm1 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; KNL-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 +; KNL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: shuffle_v32i16_ashr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62: +; SKX: ## %bb.0: +; SKX-NEXT: vpsrad $25, %zmm0, %zmm0 +; SKX-NEXT: vpsrad $25, %zmm1, %zmm1 +; SKX-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %1 = ashr <16 x i32> %a0, + %2 = ashr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <32 x i16> + %4 = bitcast <16 x i32> %2 to <32 x i16> + %5 = shufflevector <32 x i16> %3, <32 x i16> %4, <32 x i32> + ret <32 x i16> %5 +} + +define <32 x i16> @shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; KNL-LABEL: shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62: +; KNL: ## %bb.0: +; KNL-NEXT: vpsrld $25, %zmm0, %zmm0 +; KNL-NEXT: vpsrld $25, %zmm1, %zmm1 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; KNL-NEXT: vpackusdw %ymm3, %ymm2, %ymm2 +; KNL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: shuffle_v32i16_lshr_00_02_04_06_32_34_36_38_08_10_12_14_40_42_44_46_16_18_20_22_48_50_52_54_24_26_28_30_56_58_60_62: +; SKX: ## %bb.0: +; SKX-NEXT: vpsrld $25, %zmm0, %zmm0 +; SKX-NEXT: vpsrld $25, %zmm1, %zmm1 +; SKX-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %1 = lshr <16 x i32> %a0, + %2 = lshr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <32 x i16> + %4 = bitcast <16 x i32> %2 to <32 x i16> + %5 = shufflevector <32 x i16> %3, <32 x i16> %4, <32 x i32> + ret <32 x i16> %5 +} + define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) { ; KNL-LABEL: insert_dup_mem_v32i16_i32: ; KNL: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 3afb54a9d3bb..3c95f4ce400e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -546,6 +546,94 @@ define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_ ret <64 x i8> %shuffle } +define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; AVX512F-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512DQ-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512VBMI-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = ashr <16 x i32> %a0, + %2 = ashr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <64 x i8> + %4 = bitcast <16 x i32> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +} + +define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vpackusdw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512DQ-NEXT: vpackusdw %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512VBMI-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = lshr <16 x i32> %a0, + %2 = lshr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <64 x i8> + %4 = bitcast <16 x i32> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +} + define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { ; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 08923cab6ebb..17781eb922a6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -159,6 +159,36 @@ define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) { ret <32 x i16> %1 } +define <32 x i16> @combine_vpermi2var_as_packssdw(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; CHECK-LABEL: combine_vpermi2var_as_packssdw: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsrad $25, %zmm0, %zmm0 +; CHECK-NEXT: vpsrad $25, %zmm1, %zmm1 +; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = ashr <16 x i32> %a0, + %2 = ashr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <32 x i16> + %4 = bitcast <16 x i32> %2 to <32 x i16> + %5 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %3, <32 x i16> , <32 x i16> %4, i32 -1) + ret <32 x i16> %5 +} + +define <32 x i16> @combine_vpermi2var_as_packusdw(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; CHECK-LABEL: combine_vpermi2var_as_packusdw: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsrld $25, %zmm0, %zmm0 +; CHECK-NEXT: vpsrld $25, %zmm1, %zmm1 +; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %1 = lshr <16 x i32> %a0, + %2 = lshr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <32 x i16> + %4 = bitcast <16 x i32> %2 to <32 x i16> + %5 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %3, <32 x i16> , <32 x i16> %4, i32 -1) + ret <32 x i16> %5 +} + define <64 x i8> @combine_pshufb_as_packsswb(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; CHECK-LABEL: combine_pshufb_as_packsswb: ; CHECK: # %bb.0: diff --git a/llvm/test/ExecutionEngine/OrcLazy/Inputs/bar-return-i32-call-foo.ll b/llvm/test/ExecutionEngine/OrcLazy/Inputs/bar-return-i32-call-foo.ll new file mode 100644 index 000000000000..5a36041640ce --- /dev/null +++ b/llvm/test/ExecutionEngine/OrcLazy/Inputs/bar-return-i32-call-foo.ll @@ -0,0 +1,8 @@ +declare i32 @foo() + +define i32 @bar() { +entry: + %0 = call i32 @foo() + ret i32 %0 +} + diff --git a/llvm/test/ExecutionEngine/OrcLazy/Inputs/basic-object-source.ll b/llvm/test/ExecutionEngine/OrcLazy/Inputs/foo-return-i32-0.ll similarity index 100% rename from llvm/test/ExecutionEngine/OrcLazy/Inputs/basic-object-source.ll rename to llvm/test/ExecutionEngine/OrcLazy/Inputs/foo-return-i32-0.ll diff --git a/llvm/test/ExecutionEngine/OrcLazy/basic-object-file-loading.ll b/llvm/test/ExecutionEngine/OrcLazy/basic-object-file-loading.ll index 0d815782b1cb..9dc74d5241bb 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/basic-object-file-loading.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/basic-object-file-loading.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj -o %t %p/Inputs/basic-object-source.ll +; RUN: llc -filetype=obj -o %t %p/Inputs/foo-return-i32-0.ll ; RUN: lli -jit-kind=orc-lazy -extra-object %t %s ; ; Check that we can load an object file and call a function in it. diff --git a/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll b/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll index 00b54fbf73fd..67d392e71456 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/global-ctors-and-dtors.ll @@ -1,6 +1,12 @@ -; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout %s | FileCheck %s +; Test that global constructors and destructors are run: ; -; Test that global constructors and destructors are run. +; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout -extra-module %s \ +; RUN: %S/Inputs/noop-main.ll | FileCheck %s +; +; Test that this is true for global constructors and destructors in other +; JITDylibs. +; RUN: lli -jit-kind=orc-lazy -orc-lazy-debug=funcs-to-stdout \ +; RUN: -jd extra -extra-module %s -jd main %S/Inputs/noop-main.ll | FileCheck %s ; ; CHECK: Hello ; CHECK: [ {{.*}}main{{.*}} ] @@ -22,11 +28,6 @@ entry: declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) -define i32 @main(i32 %argc, i8** nocapture readnone %argv) { -entry: - ret i32 0 -} - define internal void @_GLOBAL__sub_I_hello.cpp() { entry: %puts.i.i.i = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str, i64 0, i64 0)) diff --git a/llvm/test/ExecutionEngine/OrcLazy/static-library-support.ll b/llvm/test/ExecutionEngine/OrcLazy/static-library-support.ll index 304160c7f787..a13441187aa8 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/static-library-support.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/static-library-support.ll @@ -1,11 +1,13 @@ ; This first line will generate the .o files for the next run line -; RUN: llc -filetype=obj -o %t.o %p/Inputs/basic-object-source.ll -; RUN: llvm-ar r %t.a %t.o -; RUN: lli -jit-kind=orc-lazy -extra-archive %t.a %s +; RUN: rm -rf %t && mkdir -p %t +; RUN: llc -filetype=obj -o %t/foo.o %p/Inputs/foo-return-i32-0.ll +; RUN: llc -filetype=obj -o %t/bar.o %p/Inputs/bar-return-i32-call-foo.ll +; RUN: llvm-ar r %t/staticlib.a %t/foo.o %t/bar.o +; RUN: lli -jit-kind=orc-lazy -extra-archive %t/staticlib.a %s -declare i32 @foo() +declare i32 @bar() define i32 @main() { - %r = call i32 @foo( ) ; [#uses=1] + %r = call i32 @bar() ; [#uses=1] ret i32 %r } diff --git a/llvm/test/FileCheck/numeric-defines-diagnostics.txt b/llvm/test/FileCheck/numeric-defines-diagnostics.txt index da8c9cc8884e..ddced3721f1d 100644 --- a/llvm/test/FileCheck/numeric-defines-diagnostics.txt +++ b/llvm/test/FileCheck/numeric-defines-diagnostics.txt @@ -28,6 +28,7 @@ NUMERRCLITRAIL-NEXT:Global define #1: #VALUE+2=10 (parsed as: {{\[\[#VALUE\+2:10 NUMERRCLITRAIL-NEXT: ^ ; Invalid format for variable. +RUN: %ProtectFileCheckOutput \ RUN: not FileCheck -D#,VALUE=10 --input-file %s %s 2>&1 \ RUN: | FileCheck %s --strict-whitespace --match-full-lines --check-prefix NUMERRCLIFMT diff --git a/llvm/test/FileCheck/numeric-expression.txt b/llvm/test/FileCheck/numeric-expression.txt index 14e378281793..8396b6e2de11 100644 --- a/llvm/test/FileCheck/numeric-expression.txt +++ b/llvm/test/FileCheck/numeric-expression.txt @@ -41,8 +41,10 @@ CHECK-LABEL: DEF FMT SPC CHECK-NEXT: [[# %x , VAR2a : ]] ; Numeric variable definition with unsupported matching format. +RUN: %ProtectFileCheckOutput \ RUN: not FileCheck --check-prefixes ERR,INVALID-FMT-SPEC1 --input-file %s %s 2>&1 \ RUN: | FileCheck --check-prefix INVALID-FMT-SPEC-MSG1 --strict-whitespace %s +RUN: %ProtectFileCheckOutput \ RUN: not FileCheck --check-prefixes ERR,INVALID-FMT-SPEC2 --input-file %s %s 2>&1 \ RUN: | FileCheck --check-prefix INVALID-FMT-SPEC-MSG2 --strict-whitespace %s @@ -182,6 +184,7 @@ CHECK-NEXT: [[# %u, VAR2]] CHECK-NEXT: [[# %u, VAR3]] ; Conflicting implicit format. +RUN: %ProtectFileCheckOutput \ RUN: not FileCheck --check-prefixes CHECK,FMT-CONFLICT --input-file %s %s 2>&1 \ RUN: | FileCheck --strict-whitespace --check-prefix FMT-CONFLICT-MSG %s @@ -359,6 +362,7 @@ SAME-LINE-USE-MSG2-NEXT: {{S}}AME-LINE-USE2-NEXT: {{\[\[#VAR2:VAR1\+1\]\] \[\[#V SAME-LINE-USE-MSG2-NEXT: {{^}} ^{{$}} ; Invalid change of format in variable redefinition. +RUN: %ProtectFileCheckOutput \ RUN: not FileCheck --check-prefix REDEF-NEW-FMT --input-file %s %s 2>&1 \ RUN: | FileCheck --strict-whitespace --check-prefix REDEF-NEW-FMT-MSG %s diff --git a/llvm/test/MC/PowerPC/ppc64-prefix-align.s b/llvm/test/MC/PowerPC/ppc64-prefix-align.s index 80d2f0722a76..29594e9e33ae 100644 --- a/llvm/test/MC/PowerPC/ppc64-prefix-align.s +++ b/llvm/test/MC/PowerPC/ppc64-prefix-align.s @@ -13,10 +13,10 @@ beq 0, LAB1 # 4 beq 1, LAB2 # 8 -# CHECK-BE: 0: 41 82 00 c0 bt 2, .+192 -# CHECK-BE-NEXT: 4: 41 86 00 f8 bt 6, .+248 -# CHECK-LE: 0: c0 00 82 41 bt 2, .+192 -# CHECK-LE-NEXT: 4: f8 00 86 41 bt 6, .+248 +# CHECK-BE: 0: 41 82 00 c0 bt 2, 0xc0 +# CHECK-BE-NEXT: 4: 41 86 00 f8 bt 6, 0xfc +# CHECK-LE: 0: c0 00 82 41 bt 2, 0xc0 +# CHECK-LE-NEXT: 4: f8 00 86 41 bt 6, 0xfc paddi 1, 2, 8589934576, 0 # 16 paddi 1, 2, 8589934576, 0 # 24 paddi 1, 2, 8589934576, 0 # 32 diff --git a/llvm/test/MC/RISCV/attribute-with-option.s b/llvm/test/MC/RISCV/attribute-with-option.s new file mode 100644 index 000000000000..749717cb7a55 --- /dev/null +++ b/llvm/test/MC/RISCV/attribute-with-option.s @@ -0,0 +1,21 @@ +## When a user specifies an architecture extension which conflicts with an +## architecture attribute, we use the architecture attribute instead of the +## command line option. +## +## This test uses option '-mattr=+e' to specify the "e" extension. However, +## there is an architecture attribute in the file to specify rv32i. We will +## use rv32i to assemble the file instead of rv32e. + +# RUN: llvm-mc %s -triple=riscv32 -mattr=+e -filetype=obj -o - \ +# RUN: | llvm-readobj -A - | FileCheck %s + +.attribute arch, "rv32i2p0" +## Invalid operand for RV32E, because x16 is an invalid register for RV32E. +## Use RV32I to assemble, since it will not trigger an assembly error. +lui x16, 1 + +## Check that the architecture attribute is not overridden by the command line +## option. +# CHECK: Tag: 5 +# CHECK-NEXT: TagName: arch +# CHECK-NEXT: Value: rv32i2p0 diff --git a/llvm/test/MC/X86/align-via-padding-corner.s b/llvm/test/MC/X86/align-via-padding-corner.s new file mode 100644 index 000000000000..cc13ff7eed7f --- /dev/null +++ b/llvm/test/MC/X86/align-via-padding-corner.s @@ -0,0 +1,29 @@ + # RUN: llvm-mc -mcpu=skylake -filetype=obj -triple x86_64-pc-linux-gnu %s -x86-pad-max-prefix-size=5 | llvm-objdump -d - | FileCheck %s + + + # The first test check the correctness cornercase - can't add prefixes on a + # instruction following by a prefix. + .globl labeled_prefix_test +labeled_prefix_test: +# CHECK: 0: 2e 2e 2e 2e 2e e9 06 00 00 00 jmp +# CHECK: a: 3e e9 00 00 00 00 jmp + jmp bar + DS + jmp bar + .p2align 4 +bar: + ret + + # The second test is similar to the second test - can't add prefixes on a + # instruction following by hardcode. + .p2align 5 + .globl labeled_hardcode_test +labeled_hardcode_test: +# CHECK: 20: 2e 2e 2e 2e 2e e9 06 00 00 00 jmp +# CHECK: 2a: 3e e9 00 00 00 00 jmp + jmp baz + .byte 0x3e + jmp baz + .p2align 4 +baz: + ret diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 8dc9b945d589..369a661a89cf 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -76,6 +76,26 @@ define void @test3a(i8* %p) { ret void } +declare noalias i8* @aligned_alloc(i64, i64) + +define void @test3b(i8* %p) { + %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128) + ; CHECK: %1 = alloca i8, i64 128, align 32 + ; CHECK-NEXT: tail call void @nofree_arg_only + tail call void @nofree_arg_only(i8* %1, i8* %p) + ; CHECK-NOT: @free(i8* %1) + tail call void @free(i8* %1) + ret void +} + +; leave alone non-constant alignments. +define void @test3c(i64 %alignment) { + %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128) + ; CHECK: tail call noalias i8* @aligned_alloc + tail call void @free(i8* %1) + ret void +} + declare noalias i8* @calloc(i64, i64) define void @test0() { @@ -90,7 +110,7 @@ define void @test0() { ret void } -; TEST 4 +; TEST 4 define void @test4() { %1 = tail call noalias i8* @malloc(i64 4) ; CHECK: %1 = alloca i8, i64 4 @@ -219,7 +239,7 @@ define i32 @test_lifetime() { ret i32 %3 } -; TEST 11 +; TEST 11 define void @test11() { %1 = tail call noalias i8* @malloc(i64 4) diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index 1e93e97634b2..ef8cd5234326 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -1276,13 +1276,11 @@ define i32 @ret1or2(i1 %c) { define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { ; OLD_PM-LABEL: define {{[^@]+}}@callee_range_1 ; OLD_PM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) -; OLD_PM-NEXT: [[F:%.*]] = and i1 true, true -; OLD_PM-NEXT: ret i1 [[F]] +; OLD_PM-NEXT: ret i1 true ; ; NEW_PM-LABEL: define {{[^@]+}}@callee_range_1 ; NEW_PM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) -; NEW_PM-NEXT: [[F:%.*]] = and i1 true, true -; NEW_PM-NEXT: ret i1 [[F]] +; NEW_PM-NEXT: ret i1 true ; ; CGSCC_OLD_PM-LABEL: define {{[^@]+}}@callee_range_1 ; CGSCC_OLD_PM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll index 6315e3bd74da..634c37568ea3 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll @@ -457,7 +457,7 @@ define i1 @test14_slt(i32 %a) { ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[RESULT:%.*]] = or i1 false, false -; CHECK-NEXT: ret i1 [[RESULT]] +; CHECK-NEXT: ret i1 false ; CHECK: else: ; CHECK-NEXT: ret i1 false ; @@ -508,7 +508,7 @@ define i1 @test14_sgt(i32 %a) { ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[RESULT:%.*]] = or i1 false, false -; CHECK-NEXT: ret i1 [[RESULT]] +; CHECK-NEXT: ret i1 false ; CHECK: else: ; CHECK-NEXT: ret i1 false ; @@ -585,7 +585,7 @@ define i1 @test14_ugt(i32 %a) { ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[RESULT:%.*]] = or i1 false, false -; CHECK-NEXT: ret i1 [[RESULT]] +; CHECK-NEXT: ret i1 false ; CHECK: else: ; CHECK-NEXT: ret i1 false ; @@ -629,6 +629,31 @@ else: ret i1 false } +define i1 @test14_ugt_and(i32 %a) { +; CHECK-LABEL: @test14_ugt_and( +; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A_OFF]], 8 +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[RESULT:%.*]] = and i1 false, false +; CHECK-NEXT: ret i1 false +; CHECK: else: +; CHECK-NEXT: ret i1 false +; + %a.off = add i32 %a, -8 + %cmp = icmp ugt i32 %a.off, 8 + br i1 %cmp, label %then, label %else + +then: + %dead.1 = icmp eq i32 %a, 8 + %dead.2 = icmp eq i32 %a, 16 + %result = and i1 %dead.1, %dead.2 + ret i1 %result + +else: + ret i1 false +} + @limit = external global i32 define i1 @test15(i32 %a) { ; CHECK-LABEL: @test15( diff --git a/llvm/test/Transforms/GVN/malloc-load-removal.ll b/llvm/test/Transforms/GVN/malloc-load-removal.ll index 1d7a2ddc4c2d..84f4746344ca 100644 --- a/llvm/test/Transforms/GVN/malloc-load-removal.ll +++ b/llvm/test/Transforms/GVN/malloc-load-removal.ll @@ -54,3 +54,28 @@ if.end: ; preds = %if.then, %entry ; CHECK_NO_LIBCALLS: load ; CHECK_NO_LIBCALLS: icmp } + +declare i8* @aligned_alloc(i64, i64) nounwind + +define noalias i8* @test3() nounwind uwtable ssp { +entry: + %call = tail call i8* @aligned_alloc(i64 256, i64 32) nounwind + %0 = load i8, i8* %call, align 32 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test3( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test3( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} diff --git a/llvm/test/Transforms/Inline/ret_attr_update.ll b/llvm/test/Transforms/Inline/ret_attr_update.ll deleted file mode 100644 index 2e53540c3fe2..000000000000 --- a/llvm/test/Transforms/Inline/ret_attr_update.ll +++ /dev/null @@ -1,159 +0,0 @@ -; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s -; RUN: opt < %s -passes=always-inline -S | FileCheck %s - -declare i8* @foo(i8*) argmemonly nounwind - -define i8* @callee(i8 *%p) alwaysinline { -; CHECK: @callee( -; CHECK: call i8* @foo(i8* noalias %p) - %r = call i8* @foo(i8* noalias %p) - ret i8* %r -} - -define i8* @caller(i8* %ptr, i64 %x) { -; CHECK-LABEL: @caller -; CHECK: call nonnull i8* @foo(i8* noalias - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %p = call nonnull i8* @callee(i8* %gep) - ret i8* %p -} - -declare void @llvm.experimental.guard(i1,...) -; Cannot add nonnull attribute to foo -; because the guard is a throwing call -define internal i8* @callee_with_throwable(i8* %p) alwaysinline { -; CHECK-NOT: callee_with_throwable - %r = call i8* @foo(i8* %p) - %cond = icmp ne i8* %r, null - call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] - ret i8* %r -} - -declare i8* @bar(i8*) readonly nounwind -; Here also we cannot add nonnull attribute to the call bar. -define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline { -; CHECK-NOT: callee_with_explicit_control_flow - %r = call i8* @bar(i8* %p) - %cond = icmp ne i8* %r, null - br i1 %cond, label %ret, label %orig - -ret: - ret i8* %r - -orig: - ret i8* %p -} - -define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) { -; CHECK-LABEL: @caller2 -; CHECK: call i8* @foo -; CHECK: call i8* @bar - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %p = call nonnull i8* @callee_with_throwable(i8* %gep) - %q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep) - br i1 %cond, label %pret, label %qret - -pret: - ret i8* %p - -qret: - ret i8* %q -} - -define internal i8* @callee3(i8 *%p) alwaysinline { -; CHECK-NOT: callee3 - %r = call noalias i8* @foo(i8* %p) - ret i8* %r -} - -; add the deref attribute to the existing attributes on foo. -define i8* @caller3(i8* %ptr, i64 %x) { -; CHECK-LABEL: caller3 -; CHECK: call noalias dereferenceable_or_null(12) i8* @foo - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep) - ret i8* %p -} - -declare i8* @inf_loop_call(i8*) nounwind -; We cannot propagate attributes to foo because we do not know whether inf_loop_call -; will return execution. -define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline { -; CHECK-NOT: callee_with_sideeffect_callsite - %r = call i8* @foo(i8* %p) - %v = call i8* @inf_loop_call(i8* %p) - ret i8* %r -} - -; do not add deref attribute to foo -define i8* @test4(i8* %ptr, i64 %x) { -; CHECK-LABEL: test4 -; CHECK: call i8* @foo - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep) - ret i8* %p -} - -declare i8* @baz(i8*) nounwind readonly -define internal i8* @callee5(i8* %p) alwaysinline { -; CHECK-NOT: callee5 - %r = call i8* @foo(i8* %p) - %v = call i8* @baz(i8* %p) - ret i8* %r -} - -; add the deref attribute to foo. -define i8* @test5(i8* %ptr, i64 %x) { -; CHECK-LABEL: test5 -; CHECK: call dereferenceable_or_null(12) i8* @foo - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep) - ret i8* %s -} - -; deref attributes have different values on the callee and the call feeding into -; the return. -; AttrBuilder chooses the already existing value and does not overwrite it. -define internal i8* @callee6(i8* %p) alwaysinline { -; CHECK-NOT: callee6 - %r = call dereferenceable_or_null(16) i8* @foo(i8* %p) - %v = call i8* @baz(i8* %p) - ret i8* %r -} - - -define i8* @test6(i8* %ptr, i64 %x) { -; CHECK-LABEL: test6 -; CHECK: call dereferenceable_or_null(16) i8* @foo - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep) - ret i8* %s -} - -; We add the attributes from the callee to both the calls below. -define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline { -; CHECK-NOT: @callee7( - br i1 %cond, label %pass, label %fail - -pass: - %r = call i8* @foo(i8* noalias %ptr) - ret i8* %r - -fail: - %s = call i8* @baz(i8* %ptr) - ret i8* %s -} - -define void @test7(i8* %ptr, i64 %x, i1 %cond) { -; CHECK-LABEL: @test7 -; CHECK: call nonnull i8* @foo(i8* noalias -; CHECK: call nonnull i8* @baz -; CHECK: phi i8* -; CHECK: call void @snort - - %gep = getelementptr inbounds i8, i8* %ptr, i64 %x - %t = call nonnull i8* @callee7(i8* %gep, i1 %cond) - call void @snort(i8* %t) - ret void -} -declare void @snort(i8*) diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 01084312c9f8..897242e4837b 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -159,8 +159,8 @@ define i8 @n8(i8 %x, i1 %y, i8 %z) { ; x - (y - z) -> x - y + z -> x + (z - y) define i8 @t9(i8 %x, i8 %y) { ; CHECK-LABEL: @t9( -; CHECK-NEXT: [[T01:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[T01]] +; CHECK-NEXT: [[T0_NEG:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[T0_NEG]] ; %t0 = sub i8 %y, %x %t1 = sub i8 0, %t0 @@ -375,3 +375,51 @@ define i8 @n21(i8 %x, i16 %y) { %t2 = sub i8 %x, %t1 ret i8 %t2 } + +define i4 @negate_xor(i4 %x) { +; CHECK-LABEL: @negate_xor( +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], -6 +; CHECK-NEXT: [[O_NEG:%.*]] = add i4 [[TMP1]], 1 +; CHECK-NEXT: ret i4 [[O_NEG]] +; + %o = xor i4 %x, 5 + %r = sub i4 0, %o + ret i4 %r +} + +define <2 x i4> @negate_xor_vec(<2 x i4> %x) { +; CHECK-LABEL: @negate_xor_vec( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], +; CHECK-NEXT: ret <2 x i4> [[O_NEG]] +; + %o = xor <2 x i4> %x, + %r = sub <2 x i4> zeroinitializer, %o + ret <2 x i4> %r +} + +define i8 @negate_xor_use(i8 %x) { +; CHECK-LABEL: @negate_xor_use( +; CHECK-NEXT: [[O:%.*]] = xor i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use8(i8 [[O]]) +; CHECK-NEXT: [[R:%.*]] = sub i8 0, [[O]] +; CHECK-NEXT: ret i8 [[R]] +; + %o = xor i8 %x, 5 + call void @use8(i8 %o) + %r = sub i8 0, %o + ret i8 %r +} + +define i4 @negate_shl_xor(i4 %x, i4 %y) { +; CHECK-LABEL: @negate_shl_xor( +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], -6 +; CHECK-NEXT: [[O_NEG:%.*]] = add i4 [[TMP1]], 1 +; CHECK-NEXT: [[S_NEG:%.*]] = shl i4 [[O_NEG]], [[Y:%.*]] +; CHECK-NEXT: ret i4 [[S_NEG]] +; + %o = xor i4 %x, 5 + %s = shl i4 %o, %y + %r = sub i4 0, %s + ret i4 %r +} diff --git a/llvm/test/Transforms/JumpThreading/select-unfold-msan.ll b/llvm/test/Transforms/JumpThreading/select-unfold-msan.ll new file mode 100644 index 000000000000..ea336e0f0f7e --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/select-unfold-msan.ll @@ -0,0 +1,28 @@ +; PR45220 +; RUN: opt -S -jump-threading < %s | FileCheck %s + +declare i1 @NOP() + +define dso_local i32 @f(i1 %b, i1 %u) sanitize_memory { +entry: + br i1 %b, label %if.end, label %if.else + +if.else: + %call = call i1 @NOP() + br label %if.end + +if.end: +; Check that both selects in this BB are still in place, +; and were not replaced with a conditional branch. +; CHECK: phi +; CHECK-NEXT: phi +; CHECK-NEXT: select +; CHECK-NEXT: select +; CHECK-NEXT: ret + %u1 = phi i1 [ true, %if.else ], [ %u, %entry ] + %v = phi i1 [ %call, %if.else ], [ false, %entry ] + %s = select i1 %u1, i32 22, i32 0 + %v1 = select i1 %v, i32 %s, i32 42 + ret i32 %v1 +} + diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index 8302ad9562f5..250a51a27b44 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -111,7 +111,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace( } ; CHECK-LABEL: @merge_global_store_2_constants_f32_i32 -; CHECK store <2 x float> , <2 x float> addrspace(1)* %{{[0-9]+$}} +; CHECK: store <2 x i32> , <2 x i32> addrspace(1)* define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll index 1cb8d14f1778..3f24f5c9e53d 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vect-ptr-ptr-size-mismatch.ll @@ -55,7 +55,7 @@ entry: } ; CHECK-LABEL: @ext_ptr -; CHECK load <2 x i32> +; CHECK: load <2 x i32> define void @ext_ptr(i32 addrspace(5)* %p) { entry: %gep1 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 0 @@ -68,7 +68,7 @@ entry: } ; CHECK-LABEL: @shrink_ptr -; CHECK load <2 x i32> +; CHECK: load <2 x i32> define void @shrink_ptr(i32* %p) { entry: %gep1 = getelementptr inbounds i32, i32* %p, i64 0 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll new file mode 100644 index 000000000000..c417ba719412 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s + +; Make sure we correctly lower in the presence of getelementptr constant +; expressions. + +@foo = global [5 x <4 x double>] zeroinitializer, align 16 + +define void @test(i32 %r, i32 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[R:%.*]], i32* [[R_ADDR]], align 4 +; CHECK-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* bitcast ([5 x <4 x double>]* @foo to <2 x double>*), align 8 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i32 0, i32 0, i32 2) to <2 x double>*), align 8 +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> undef, double [[TMP7]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]] +; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> undef, double [[TMP9]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> +; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> undef, double [[TMP14]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> undef, double [[TMP16]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> +; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> undef, double [[TMP21]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 +; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> undef, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] +; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> +; CHECK-NEXT: store <2 x double> [[COL_LOAD]], <2 x double>* bitcast (double* getelementptr inbounds ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 2, i32 0) to <2 x double>*), align 8 +; CHECK-NEXT: store <2 x double> [[COL_LOAD1]], <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 2, i32 2) to <2 x double>*), align 8 +; CHECK-NEXT: ret void +; +entry: + %r.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + store i32 %r, i32* %r.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %0 = load <4 x double>, <4 x double>* getelementptr inbounds ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 0), align 16 + %mul = call <4 x double> @llvm.matrix.multiply(<4 x double> %0, <4 x double> %0, i32 2, i32 2, i32 2) + store <4 x double> %0, <4 x double>* getelementptr inbounds ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 2), align 16 + ret void +} + +declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32) diff --git a/llvm/test/Transforms/NewGVN/malloc-load-removal.ll b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll index 72f4839a5545..c62bac950df2 100644 --- a/llvm/test/Transforms/NewGVN/malloc-load-removal.ll +++ b/llvm/test/Transforms/NewGVN/malloc-load-removal.ll @@ -54,3 +54,28 @@ if.end: ; preds = %if.then, %entry ; CHECK_NO_LIBCALLS: load ; CHECK_NO_LIBCALLS: icmp } + +declare i8* @aligned_alloc(i64, i64) nounwind + +define noalias i8* @test3() nounwind uwtable ssp { +entry: + %call = tail call i8* @aligned_alloc(i64 256, i64 32) nounwind + %0 = load i8, i8* %call, align 32 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test3( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test3( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll new file mode 100644 index 000000000000..bda54e25b945 --- /dev/null +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +; This test checks that SROA runs mem2reg on scalable vectors. + +define @alloca_nxv16i1( %pg) { +; CHECK-LABEL: alloca_nxv16i1 +; CHECK-NEXT: ret %pg + %pg.addr = alloca + store %pg, * %pg.addr + %1 = load , * %pg.addr + ret %1 +} + +define @alloca_nxv16i8( %vec) { +; CHECK-LABEL: alloca_nxv16i8 +; CHECK-NEXT: ret %vec + %vec.addr = alloca + store %vec, * %vec.addr + %1 = load , * %vec.addr + ret %1 +} + +; Test scalable alloca that can't be promoted. Mem2Reg only considers +; non-volatile loads and stores for promotion. +define @unpromotable_alloca( %vec) { +; CHECK-LABEL: unpromotable_alloca +; CHECK-NEXT: %vec.addr = alloca +; CHECK-NEXT: store volatile %vec, * %vec.addr +; CHECK-NEXT: %1 = load volatile , * %vec.addr +; CHECK-NEXT: ret %1 + %vec.addr = alloca + store volatile %vec, * %vec.addr + %1 = load volatile , * %vec.addr + ret %1 +} diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr45371-find-either-reset.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr45371-find-either-reset.ll new file mode 100644 index 000000000000..efe426b718eb --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr45371-find-either-reset.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -separate-const-offset-from-gep < %s | FileCheck %s + +@e = external global [4000 x i8], align 1 + +define void @find_either_reset() { +; CHECK-LABEL: @find_either_reset( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 65536, undef +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SUB]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[TMP0]], 96 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i8 0 to i64 +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4000 x i8], [4000 x i8]* @e, i64 [[IDXPROM]], i64 [[IDXPROM1]] +; CHECK-NEXT: ret void +; +entry: + %sub = sub nsw i32 65536, undef + %0 = trunc i32 %sub to i8 + %1 = add i8 %0, -4000 + %arrayidx = getelementptr inbounds [4000 x i8], [4000 x i8]* @e, i8 0, i8 %1 + ret void +} diff --git a/llvm/test/Verifier/matrix-intrinsics.ll b/llvm/test/Verifier/matrix-intrinsics.ll new file mode 100644 index 000000000000..d2f23ab7894e --- /dev/null +++ b/llvm/test/Verifier/matrix-intrinsics.ll @@ -0,0 +1,40 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32) +define <4 x float> @transpose(<4 x float> %m) { +; CHECK: assembly parsed, but does not verify as correct! +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector + %result.1 = call <4 x float> @llvm.matrix.transpose.v4f32(<4 x float> %m, i32 3, i32 2) + %result.2 = call <4 x float> @llvm.matrix.transpose.v4f32(<4 x float> %result.1, i32 2, i32 1) + ret <4 x float> %result.2 +} + +declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32) +define <4 x float> @multiply(<4 x float> %m) { +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector + %result.1 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %m, <4 x float> %m, i32 3, i32 2, i32 2) + %result.2 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %result.1, <4 x float> %m, i32 2, i32 2, i32 1) + ret <4 x float> %result.2 +} + +declare <4 x float> @llvm.matrix.columnwise.load.v4f32.p0v4f32(<4 x float>*, i32, i32, i32) +declare <6 x float> @llvm.matrix.columnwise.load.v6f32.p0v6f32(<6 x float>*, i32, i32, i32) +define <4 x float> @columnwise_load(<4 x float>* %m, <6 x float>* %n) { +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector + %result.1 = call <4 x float> @llvm.matrix.columnwise.load.v4f32.p0v4f32(<4 x float>* %m, i32 2, i32 1, i32 2) + %result.2 = call <6 x float> @llvm.matrix.columnwise.load.v6f32.p0v6f32(<6 x float>* %n, i32 2, i32 3, i32 3) + ret <4 x float> %result.1 +} + +declare void @llvm.matrix.columnwise.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, i32, i32) +declare void @llvm.matrix.columnwise.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i32, i32, i32) +define void @columnwise_store(<4 x float>* %m, <6 x float>* %n) { +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector +; CHECK-NEXT: result of a matrix operation does not fit in the returned vector + call void @llvm.matrix.columnwise.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i32 2, i32 1, i32 2) + call void @llvm.matrix.columnwise.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i32 2, i32 3, i32 3) + ret void +} diff --git a/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-size.ifs b/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-size.ifs index 9afb08802726..30b7cda9b548 100644 --- a/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-size.ifs +++ b/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-size.ifs @@ -1,8 +1,8 @@ # NOTE: Used by weak-mismatch.ifs ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - foobar: { Type: Object, Size: 2 } + - { Name: foobar, Type: Object, Size: 2 } ... diff --git a/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-type.ifs b/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-type.ifs index 8fc550a644cb..3f8d54c7e536 100644 --- a/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-type.ifs +++ b/llvm/test/tools/llvm-ifs/Inputs/strong-mismatch-type.ifs @@ -1,8 +1,8 @@ # NOTE: Used by weak-mismatch.ifs ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - foobar: { Type: Func } + - { Name: foobar, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-header-format.ifs b/llvm/test/tools/llvm-ifs/conflict-header-format.ifs index 40ae9c0526f2..4e26fb8080e8 100644 --- a/llvm/test/tools/llvm-ifs/conflict-header-format.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-header-format.ifs @@ -5,10 +5,10 @@ # CHECK-IFS-NEXT: Filenames: # CHECK-IFS-NEXT: ObjectFileFormat Values: TBD ELF ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-apple-unknown ObjectFileFormat: TBD Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-header-triple.ifs b/llvm/test/tools/llvm-ifs/conflict-header-triple.ifs index 15bddc6a15f7..9ce04b8b3f31 100644 --- a/llvm/test/tools/llvm-ifs/conflict-header-triple.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-header-triple.ifs @@ -5,10 +5,10 @@ # CHECK-IFS-NEXT: Filenames: # CHECK-IFS-NEXT: Triple Values: mips-unknown-linux x86_64-unknown-linux-gnu ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: mips-unknown-linux ObjectFileFormat: ELF Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-header-version.ifs b/llvm/test/tools/llvm-ifs/conflict-header-version.ifs index addf9943441b..ecdeb311f860 100644 --- a/llvm/test/tools/llvm-ifs/conflict-header-version.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-header-version.ifs @@ -5,12 +5,12 @@ # RUN: FileCheck %s --check-prefixes=CHECK-IFS2 # CHECK-IFS: error: Interface Stub: IfsVersion Mismatch. -# CHECK-IFS2: error: Interface Stub: Bad IfsVersion: 0.0, llvm-ifs supported version: 1.2. +# CHECK-IFS2: error: Interface Stub: Bad IfsVersion: 0.0, llvm-ifs supported version: 2.0. ---- !experimental-ifs-v1 +--- !experimental-ifs-v2 IfsVersion: 0.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-size.ifs b/llvm/test/tools/llvm-ifs/conflict-size.ifs index 173ce268c741..5e0fcafd55db 100644 --- a/llvm/test/tools/llvm-ifs/conflict-size.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-size.ifs @@ -7,10 +7,10 @@ # CHECK-IFS-NEXT: Filename: # CHECK-IFS-NEXT: Size Values: 1 4 ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - b: { Type: Object, Size: 1 } + - { Name: b, Type: Object, Size: 1 } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-type.ifs b/llvm/test/tools/llvm-ifs/conflict-type.ifs index c518be4e1411..1a10ea79a41c 100644 --- a/llvm/test/tools/llvm-ifs/conflict-type.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-type.ifs @@ -7,10 +7,10 @@ # CHECK-IFS-NEXT: Filename: # CHECK-IFS-NEXT: Type Values: Object Func ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Object, Size: 1 } + - { Name: a, Type: Object, Size: 1 } ... diff --git a/llvm/test/tools/llvm-ifs/conflict-weak.ifs b/llvm/test/tools/llvm-ifs/conflict-weak.ifs index 823b8f1866c3..23eb73d7535f 100644 --- a/llvm/test/tools/llvm-ifs/conflict-weak.ifs +++ b/llvm/test/tools/llvm-ifs/conflict-weak.ifs @@ -2,12 +2,12 @@ # RUN: FileCheck %s --check-prefixes=CHECK-IFS # CHECK-IFS: Symbols: -# CHECK-IFS-NEXT: a: { Type: Func, Weak: true } +# CHECK-IFS-NEXT: - { Name: a, Type: Func, Weak: true } ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Func, Weak: true } + - { Name: a, Type: Func, Weak: true } ... diff --git a/llvm/test/tools/llvm-ifs/default-empty.ifs b/llvm/test/tools/llvm-ifs/default-empty.ifs index 9848f418f58c..c61f29a37976 100644 --- a/llvm/test/tools/llvm-ifs/default-empty.ifs +++ b/llvm/test/tools/llvm-ifs/default-empty.ifs @@ -1,25 +1,25 @@ # RUN: llvm-ifs -action write-ifs -o - %s | FileCheck --check-prefixes=CHECK-DEFAULT %s # RUN: llvm-ifs -action write-ifs -o - %s %S/weak.ifs | FileCheck --check-prefixes=CHECK-MERGE %s -# CHECK-DEFAULT: --- !experimental-ifs-v1 -# CHECK-DEFAULT-NEXT: IfsVersion: 1.2 +# CHECK-DEFAULT: --- !experimental-ifs-v2 +# CHECK-DEFAULT-NEXT: IfsVersion: 2.0 # CHECK-DEFAULT-NEXT: Triple: '' # CHECK-DEFAULT-NEXT: ObjectFileFormat: ELF -# CHECK-DEFAULT-NEXT: Symbols: {} +# CHECK-DEFAULT-NEXT: Symbols: [] # CHECK-DEFAULT-NEXT: ... -# CHECK-MERGE: --- !experimental-ifs-v1 -# CHECK-MERGE-NEXT: IfsVersion: 1.0 +# CHECK-MERGE: --- !experimental-ifs-v2 +# CHECK-MERGE-NEXT: IfsVersion: 2.0 # CHECK-MERGE-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-MERGE-NEXT: ObjectFileFormat: ELF # CHECK-MERGE-NEXT: Symbols: -# CHECK-MERGE-DAG: _Z8weakFuncv: { Type: Func, Weak: true } -# CHECK-MERGE-DAG: _Z10strongFuncv: { Type: Func } +# CHECK-MERGE-DAG: - { Name: _Z8weakFuncv, Type: Func, Weak: true } +# CHECK-MERGE-DAG: - { Name: _Z10strongFuncv, Type: Func } # CHECK-MERGE: ... ---- !experimental-ifs-v1 -IfsVersion: 1.2 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: '' ObjectFileFormat: ELF -Symbols: {} +Symbols: [] ... diff --git a/llvm/test/tools/llvm-ifs/empty1.ifs b/llvm/test/tools/llvm-ifs/empty1.ifs new file mode 100644 index 000000000000..d237dd7ea10a --- /dev/null +++ b/llvm/test/tools/llvm-ifs/empty1.ifs @@ -0,0 +1,15 @@ +# RUN: llvm-ifs -action write-ifs -o - %s | FileCheck %s + +# CHECK: --- !experimental-ifs-v2 +# CHECK-NEXT: IfsVersion: 2.0 +# CHECK-NEXT: Triple: x86_64-unknown-linux-gnu +# CHECK-NEXT: ObjectFileFormat: ELF +# CHECK-NEXT: Symbols: [] +# CHECK: ... + +--- !experimental-ifs-v2 +IfsVersion: 2.0 +Triple: x86_64-unknown-linux-gnu +ObjectFileFormat: ELF +Symbols: [] +... diff --git a/llvm/test/tools/llvm-ifs/empty2.ifs b/llvm/test/tools/llvm-ifs/empty2.ifs new file mode 100644 index 000000000000..a294c777bbf9 --- /dev/null +++ b/llvm/test/tools/llvm-ifs/empty2.ifs @@ -0,0 +1,15 @@ +# RUN: llvm-ifs -action write-ifs -o - %s | FileCheck %s + +# CHECK: --- !experimental-ifs-v2 +# CHECK-NEXT: IfsVersion: 2.0 +# CHECK-NEXT: Triple: x86_64-unknown-linux-gnu +# CHECK-NEXT: ObjectFileFormat: ELF +# CHECK-NEXT: Symbols: [] +# CHECK: ... + +--- !experimental-ifs-v2 +IfsVersion: 2.0 +Triple: x86_64-unknown-linux-gnu +ObjectFileFormat: ELF +Symbols: +... diff --git a/llvm/test/tools/llvm-ifs/func.ifs b/llvm/test/tools/llvm-ifs/func.ifs index 496e26241922..d6d85782b2e3 100644 --- a/llvm/test/tools/llvm-ifs/func.ifs +++ b/llvm/test/tools/llvm-ifs/func.ifs @@ -10,13 +10,13 @@ # RUN: llvm-ifs -action write-ifs -o - %s %s | \ # RUN: FileCheck %s --check-prefixes=CHECK-MERGE-IFS -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-DAG: a: { Type: Func } -# CHECK-IFS-DAG: b: { Type: Object, Size: 4 } +# CHECK-IFS-DAG: - { Name: a, Type: Func } +# CHECK-IFS-DAG: - { Name: b, Type: Object, Size: 4 } # CHECK-IFS: ... # CHECK-ELF: ELF Header: @@ -39,18 +39,18 @@ # CHECK-DARWIN-TBD3-NEXT: ... # Here we are testing to see if two identical symbols will merge. -# CHECK-MERGE-IFS: --- !experimental-ifs-v1 -# CHECK-MERGE-IFS-NEXT: IfsVersion: 1.0 +# CHECK-MERGE-IFS: --- !experimental-ifs-v2 +# CHECK-MERGE-IFS-NEXT: IfsVersion: 2.0 # CHECK-MERGE-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-MERGE-IFS-NEXT: ObjectFileFormat: ELF # CHECK-MERGE-IFS-NEXT: Symbols: -# CHECK-MERGE-IFS-NEXT: a: { Type: Func } +# CHECK-MERGE-IFS-NEXT: - { Name: a, Type: Func } # CHECK-MERGE-IFS-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/ios-tbd.ifs b/llvm/test/tools/llvm-ifs/ios-tbd.ifs index 13671b02c5cb..5b21aedf6500 100644 --- a/llvm/test/tools/llvm-ifs/ios-tbd.ifs +++ b/llvm/test/tools/llvm-ifs/ios-tbd.ifs @@ -13,10 +13,10 @@ # CHECK-NEXT: symbols: [ __Z3fooi ] # CHECK-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: arm64-apple-ios ObjectFileFormat: TBD Symbols: - __Z3fooi: { Type: Func } + - { Name: __Z3fooi, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/macos-tbd.ifs b/llvm/test/tools/llvm-ifs/macos-tbd.ifs index bd84806fb219..b04828b2a39d 100644 --- a/llvm/test/tools/llvm-ifs/macos-tbd.ifs +++ b/llvm/test/tools/llvm-ifs/macos-tbd.ifs @@ -13,10 +13,10 @@ # CHECK-NEXT: symbols: [ __Z3fooi ] # CHECK-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: arm64-apple-macosx ObjectFileFormat: TBD Symbols: - __Z3fooi: { Type: Func } + - { Name: __Z3fooi, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/object-function-size-weak-combo.ifs b/llvm/test/tools/llvm-ifs/object-function-size-weak-combo.ifs index b6328fbc58d9..769f423f328a 100644 --- a/llvm/test/tools/llvm-ifs/object-function-size-weak-combo.ifs +++ b/llvm/test/tools/llvm-ifs/object-function-size-weak-combo.ifs @@ -4,17 +4,17 @@ # RUN: llvm-ifs -action write-bin -o - %s %S/func.ifs %S/object.ifs %S/weak.ifs | \ # RUN: llvm-readelf --all | FileCheck %s --check-prefixes=CHECK-ELF -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-DAG: e: { Type: Object, Size: 8 } -# CHECK-IFS-DAG: a: { Type: Func } -# CHECK-IFS-DAG: f: { Type: Object, Size: 2 } -# CHECK-IFS-DAG: _Z10strongFuncv: { Type: Func } -# CHECK-IFS-DAG: _Z8weakFuncv: { Type: Func, Weak: true } -# CHECK-IFS-DAG: b: { Type: Object, Size: 4 } +# CHECK-IFS-DAG: - { Name: e, Type: Object, Size: 8 } +# CHECK-IFS-DAG: - { Name: a, Type: Func } +# CHECK-IFS-DAG: - { Name: f, Type: Object, Size: 2 } +# CHECK-IFS-DAG: - { Name: _Z10strongFuncv, Type: Func } +# CHECK-IFS-DAG: - { Name: _Z8weakFuncv, Type: Func, Weak: true } +# CHECK-IFS-DAG: - { Name: b, Type: Object, Size: 4 } # CHECK-IFS: ... # CHECK-ELF: FUNC GLOBAL DEFAULT 1 _Z10strongFuncv @@ -24,11 +24,11 @@ # CHECK-ELF: OBJECT GLOBAL DEFAULT 1 e # CHECK-ELF: OBJECT GLOBAL DEFAULT 1 f ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - e: { Type: Object, Size: 8 } - f: { Type: Object, Size: 2 } + - { Name: e, Type: Object, Size: 8 } + - { Name: f, Type: Object, Size: 2 } ... diff --git a/llvm/test/tools/llvm-ifs/object.ifs b/llvm/test/tools/llvm-ifs/object.ifs index 733cc38001d3..c4823c20fce2 100644 --- a/llvm/test/tools/llvm-ifs/object.ifs +++ b/llvm/test/tools/llvm-ifs/object.ifs @@ -4,12 +4,12 @@ # RUN: llvm-ifs -action write-bin -o - %s | \ # RUN: llvm-readelf --all | FileCheck %s --check-prefixes=CHECK-ELF -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-NEXT: b: { Type: Object, Size: 4 } +# CHECK-IFS-NEXT: - { Name: b, Type: Object, Size: 4 } # CHECK-IFS-NEXT: ... # CHECK-ELF: ELF Header: @@ -19,10 +19,10 @@ # CHECK-ELF-NOT: FUNC GLOBAL DEFAULT 1 a # CHECK-ELF: OBJECT GLOBAL DEFAULT 1 b ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - b: { Type: Object, Size: 4 } + - { Name: b, Type: Object, Size: 4 } ... diff --git a/llvm/test/tools/llvm-ifs/strong.ifs b/llvm/test/tools/llvm-ifs/strong.ifs index bdc930fbaaa3..ccc1f9e5d8b6 100644 --- a/llvm/test/tools/llvm-ifs/strong.ifs +++ b/llvm/test/tools/llvm-ifs/strong.ifs @@ -1,17 +1,17 @@ # RUN: llvm-ifs -action write-ifs -o - %s %S/strong.ifs | FileCheck %s --check-prefixes=CHECK-IFS -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-DAG: _Z8weakFuncv: { Type: Func } +# CHECK-IFS-DAG: - { Name: _Z8weakFuncv, Type: Func } # CHECK-IFS: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - _Z8weakFuncv: { Type: Func } + - { Name: _Z8weakFuncv, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/tvos-tbd.ifs b/llvm/test/tools/llvm-ifs/tvos-tbd.ifs index 08c8478c1daf..6db01bf6162f 100644 --- a/llvm/test/tools/llvm-ifs/tvos-tbd.ifs +++ b/llvm/test/tools/llvm-ifs/tvos-tbd.ifs @@ -13,10 +13,10 @@ # CHECK-NEXT: symbols: [ __Z3fooi ] # CHECK-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: arm64-apple-tvos ObjectFileFormat: TBD Symbols: - __Z3fooi: { Type: Func } + - { Name: __Z3fooi, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/version-ok.ifs b/llvm/test/tools/llvm-ifs/version-ok.ifs index fd150ee77d55..646b8624feb1 100644 --- a/llvm/test/tools/llvm-ifs/version-ok.ifs +++ b/llvm/test/tools/llvm-ifs/version-ok.ifs @@ -1,9 +1,9 @@ # RUN: llvm-ifs -action write-ifs -o - %s %S/object.ifs ---- !experimental-ifs-v1 -IfsVersion: 1.1 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - a: { Type: Func } + - { Name: a, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/watchos-tbd.ifs b/llvm/test/tools/llvm-ifs/watchos-tbd.ifs index 74a9d962a3e0..fcb914265202 100644 --- a/llvm/test/tools/llvm-ifs/watchos-tbd.ifs +++ b/llvm/test/tools/llvm-ifs/watchos-tbd.ifs @@ -13,10 +13,10 @@ # CHECK-NEXT: symbols: [ __Z3fooi ] # CHECK-NEXT: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: arm64-apple-watchos ObjectFileFormat: TBD Symbols: - __Z3fooi: { Type: Func } + - { Name: __Z3fooi, Type: Func } ... diff --git a/llvm/test/tools/llvm-ifs/weak-mismatch.ifs b/llvm/test/tools/llvm-ifs/weak-mismatch.ifs index 15abc2064cc2..cf45dff8c062 100644 --- a/llvm/test/tools/llvm-ifs/weak-mismatch.ifs +++ b/llvm/test/tools/llvm-ifs/weak-mismatch.ifs @@ -10,10 +10,10 @@ # CHECK-TYPE-NEXT: Filename: # CHECK-TYPE-NEXT: Type Values: Object Func ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - foobar: { Type: Object, Size: 1, Weak: true } + - { Name: foobar, Type: Object, Size: 1, Weak: true } ... diff --git a/llvm/test/tools/llvm-ifs/weak.ifs b/llvm/test/tools/llvm-ifs/weak.ifs index a7441be1c1f2..bf8091050530 100644 --- a/llvm/test/tools/llvm-ifs/weak.ifs +++ b/llvm/test/tools/llvm-ifs/weak.ifs @@ -1,19 +1,19 @@ # RUN: llvm-ifs -action write-ifs -o - %s | FileCheck %s --check-prefixes=CHECK-IFS -# CHECK-IFS: --- !experimental-ifs-v1 -# CHECK-IFS-NEXT: IfsVersion: 1.0 +# CHECK-IFS: --- !experimental-ifs-v2 +# CHECK-IFS-NEXT: IfsVersion: 2.0 # CHECK-IFS-NEXT: Triple: x86_64-unknown-linux-gnu # CHECK-IFS-NEXT: ObjectFileFormat: ELF # CHECK-IFS-NEXT: Symbols: -# CHECK-IFS-DAG: _Z8weakFuncv: { Type: Func, Weak: true } -# CHECK-IFS-DAG: _Z10strongFuncv: { Type: Func } +# CHECK-IFS-DAG: - { Name: _Z8weakFuncv, Type: Func, Weak: true } +# CHECK-IFS-DAG: - { Name: _Z10strongFuncv, Type: Func } # CHECK-IFS: ... ---- !experimental-ifs-v1 -IfsVersion: 1.0 +--- !experimental-ifs-v2 +IfsVersion: 2.0 Triple: x86_64-unknown-linux-gnu ObjectFileFormat: ELF Symbols: - _Z8weakFuncv: { Type: Func, Weak: true } - _Z10strongFuncv: { Type: Func } + - { Name: _Z8weakFuncv, Type: Func, Weak: true } + - { Name: _Z10strongFuncv, Type: Func } ... diff --git a/llvm/test/tools/llvm-objdump/ELF/PowerPC/branch-offset.s b/llvm/test/tools/llvm-objdump/ELF/PowerPC/branch-offset.s index 73e3a68c6d9c..f04b7e5c0776 100644 --- a/llvm/test/tools/llvm-objdump/ELF/PowerPC/branch-offset.s +++ b/llvm/test/tools/llvm-objdump/ELF/PowerPC/branch-offset.s @@ -29,7 +29,9 @@ b: b .+4 # CHECK-LABEL: : -# CHECK-NEXT: bt 2, .+65532 +# CHECK-NEXT: 18: bt 2, 0x14 +# CHECK-NEXT: 1c: bt 1, 0x20 bt: bt 2, .-4 + bgt .+4 diff --git a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-all.test b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-all.test index 5a229caeb482..1dee2aa2d52a 100644 --- a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-all.test +++ b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-all.test @@ -55,7 +55,7 @@ CHECK: 000000a4 : CHECK-NEXT: ... CHECK: Disassembly of section .tdata: CHECK: 00000000 : -CHECK-NEXT: 0: 40 09 21 f9 bdnzfl 9, .+8696 +CHECK-NEXT: 0: 40 09 21 f9 bdnzfl 9, 0x21f8 CHECK-NEXT: 4: f0 1b 86 6e CHECK: Disassembly of section .tbss: CHECK: 00000008 : diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/attribute.s b/llvm/test/tools/llvm-readobj/ELF/RISCV/attribute.s new file mode 100644 index 000000000000..5ae7bfcb0290 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/attribute.s @@ -0,0 +1,44 @@ +## Test llvm-readobj & llvm-readelf can decode RISC-V attributes correctly. + +# RUN: llvm-mc -triple riscv32 -filetype obj -o %t.rv32.o %s +# RUN: llvm-mc -triple riscv64 -filetype obj -o %t.rv64.o %s +# RUN: llvm-readobj --arch-specific %t.rv32.o \ +# RUN: | FileCheck %s --check-prefix=CHECK-OBJ +# RUN: llvm-readelf -A %t.rv32.o \ +# RUN: | FileCheck %s --check-prefix=CHECK-OBJ +# RUN: llvm-readobj --arch-specific %t.rv64.o \ +# RUN: | FileCheck %s --check-prefix=CHECK-OBJ +# RUN: llvm-readelf -A %t.rv64.o \ +# RUN: | FileCheck %s --check-prefix=CHECK-OBJ + +.attribute Tag_stack_align, 16 +# CHECK-OBJ: Tag: 4 +# CHECK-OBJ-NEXT: Value: 16 +# CHECK-OBJ-NEXT: TagName: stack_align +# CHECK-OBJ-NEXT: Description: Stack alignment is 16-bytes + +.attribute Tag_arch, "rv32i2p0_m2p0_a2p0_c2p0" +# CHECK-OBJ: Tag: 5 +# CHECK-OBJ-NEXT: TagName: arch +# CHECK-OBJ-NEXT: Value: rv32i2p0_m2p0_a2p0_c2p0 + +.attribute Tag_unaligned_access, 0 +# CHECK-OBJ: Tag: 6 +# CHECK-OBJ-NEXT: Value: 0 +# CHECK-OBJ-NEXT: TagName: unaligned_access +# CHECK-OBJ-NEXT: Description: No unaligned access + +.attribute Tag_priv_spec, 2 +# CHECK-OBJ: Tag: 8 +# CHECK-OBJ-NEXT: TagName: priv_spec +# CHECK-OBJ-NEXT: Value: 2 + +.attribute Tag_priv_spec_minor, 0 +# CHECK-OBJ: Tag: 10 +# CHECK-OBJ-NEXT: TagName: priv_spec_minor +# CHECK-OBJ-NEXT: Value: 0 + +.attribute Tag_priv_spec_revision, 0 +# CHECK-OBJ: Tag: 12 +# CHECK-OBJ-NEXT: TagName: priv_spec_revision +# CHECK-OBJ-NEXT: Value: 0 diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-section-size.test b/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-section-size.test new file mode 100644 index 000000000000..524134e1579b --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-section-size.test @@ -0,0 +1,20 @@ +## This test case is used to ensure the error code is caught by llvm-readobj. + +# RUN: yaml2obj %s -D BITS=32 -o %t.32.o +# RUN: llvm-readobj -A %t.32.o 2>&1 | FileCheck -DFILE=%t %s +# RUN: yaml2obj %s -D BITS=64 -o %t.64.o +# RUN: llvm-readobj -A %t.64.o 2>&1 | FileCheck -DFILE=%t %s + +# CHECK: warning: '[[FILE]].{{32|64}}.o': invalid section length 0 at offset 0x1 + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_RISCV +Sections: + - Name: .riscv.attributes + Type: SHT_RISCV_ATTRIBUTES +## Version: 'A'(0x41), section length: 0 + Content: 4100000000 diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-version.test b/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-version.test new file mode 100644 index 000000000000..9a4d81bcc4f1 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/invalid-attr-version.test @@ -0,0 +1,21 @@ +## This test case is used to ensure llvm-readobj checks the version of +## attribute sections correctly. + +# RUN: yaml2obj %s -D BITS=32 -o %t.32.o +# RUN: llvm-readobj -A %t.32.o 2>&1 | FileCheck -DFILE=%t %s +# RUN: yaml2obj %s -D BITS=64 -o %t.64.o +# RUN: llvm-readobj -A %t.64.o 2>&1 | FileCheck -DFILE=%t %s + +# CHECK: warning: '[[FILE]].{{32|64}}.o': unrecognised FormatVersion: 0x42 + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_RISCV +Sections: + - Name: .riscv.attributes + Type: SHT_RISCV_ATTRIBUTES +## Version: 'B' + Content: 42 diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/lit.local.cfg b/llvm/test/tools/llvm-readobj/ELF/RISCV/lit.local.cfg new file mode 100644 index 000000000000..c63820126f8c --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'RISCV' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/section-types.test b/llvm/test/tools/llvm-readobj/ELF/RISCV/section-types.test new file mode 100644 index 000000000000..d1f35306afb2 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/section-types.test @@ -0,0 +1,21 @@ +## Show that all RISCV specific section types are properly printed for both +## LLVM and GNU styles. + +# RUN: yaml2obj %s -o %t-riscv.o +# RUN: llvm-readobj --section-headers %t-riscv.o | FileCheck %s --check-prefix=LLVM +# RUN: llvm-readelf --section-headers %t-riscv.o | FileCheck %s --check-prefix=GNU + +# LLVM: Name: .riscv.attributes (1) +# LLVM-NEXT: Type: SHT_RISCV_ATTRIBUTES (0x70000003) + +# GNU: [ 1] .riscv.attributes RISCV_ATTRIBUTES + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_RISCV +Sections: + - Name: .riscv.attributes + Type: SHT_RISCV_ATTRIBUTES diff --git a/llvm/test/tools/llvm-readobj/ELF/RISCV/validate-attr-section.test b/llvm/test/tools/llvm-readobj/ELF/RISCV/validate-attr-section.test new file mode 100644 index 000000000000..66a5a7a8d31f --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/RISCV/validate-attr-section.test @@ -0,0 +1,17 @@ +## We only implement attribute section printing for little-endian encoding. + +# RUN: yaml2obj %s -o %t.o +# RUN: llvm-readobj -A %t.o | FileCheck %s + +# CHECK: Attributes not implemented. + +--- !ELF +FileHeader: + Class: ELFCLASS64 +## Test big-endian encoding. + Data: ELFDATA2MSB + Type: ET_REL + Machine: EM_RISCV +Sections: + - Name: .riscv.attributes + Type: SHT_RISCV_ATTRIBUTES diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test index cdbec32efa24..f7551b481a86 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test @@ -112,3 +112,98 @@ ProgramHeaders: Sections: - Section: .hash - Section: .dynamic + +## Each SHT_HASH section starts with two 32-bit fields: nbucket and nchain. +## Check we report an error when a DT_HASH value points to data that has size less than 8 bytes. + +# RUN: yaml2obj --docnum=3 %s -o %t3.o +# RUN: llvm-readelf --elf-hash-histogram %t3.o 2>&1 | FileCheck %s --check-prefix=ERR1 -DFILE=%t3.o + +# ERR1: warning: '[[FILE]]': the hash table at offset 0x2b1 goes past the end of the file (0x2b8){{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + Chain: [ 0 ] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x239 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + FileSize: 0x23a + Sections: + - Section: .hash + - Section: .dynamic + +## Check we report a warning when the hash table goes past the end of the file. + +## Case A.1: the hash table ends right before the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=4 %s -o %t4.1.o -DNBUCKET=0x5d -DNCHAIN=0x1 +# RUN: llvm-readelf --elf-hash-histogram %t4.1.o 2>&1 | \ +# RUN: FileCheck %s --implicit-check-not={{.}} --allow-empty + +## Case A.2: the hash table ends 1 byte past the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=4 %s -o %t4.2.o -DNBUCKET=0x5e -DNCHAIN=0x1 +# RUN: llvm-readelf --elf-hash-histogram %t4.2.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 -DFILE=%t4.2.o --implicit-check-not="warning:" +# ERR2: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 94, nchain = 1{{$}} + +## Case B.1: the hash table ends right before the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=4 %s -o %t4.3.o -DNBUCKET=0x1 -DNCHAIN=0x5d +# RUN: llvm-readelf --elf-hash-histogram %t4.3.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR3 -DFILE=%t4.3.o --implicit-check-not="warning:" +# ERR3: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1){{$}} + +## Case B.2: the hash table ends 1 byte past the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=4 %s -o %t4.4.o -DNBUCKET=0x1 -DNCHAIN=0x5e +# RUN: llvm-readelf --elf-hash-histogram %t4.4.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR4 -DFILE=%t4.4.o --implicit-check-not="warning:" +# ERR4: warning: '[[FILE]]': hash table nchain (94) differs from symbol count derived from SHT_DYNSYM section header (1){{$}} +# ERR4: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 1, nchain = 94{{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + NBucket: [[NBUCKET]] + Chain: [ 0 ] + NChain: [[NCHAIN]] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + Sections: + - Section: .hash + - Section: .dynamic diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test index 9434da1882a8..40e680e3b751 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-symbols.test @@ -347,3 +347,104 @@ ProgramHeaders: Sections: - Section: .hash - Section: .dynamic + +## Each SHT_HASH section starts with two 32-bit fields: nbucket and nchain. +## Check we report an error when a DT_HASH value points to data that has size less than 8 bytes. + +# RUN: yaml2obj --docnum=6 %s -o %t6.o +# RUN: llvm-readelf --hash-symbols %t6.o 2>&1 | FileCheck %s --check-prefix=ERR1 -DFILE=%t6.o + +# ERR1: warning: '[[FILE]]': the hash table at offset 0x2b1 goes past the end of the file (0x2b8){{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + Chain: [ 0 ] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x239 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + FileSize: 0x23a + Sections: + - Section: .hash + - Section: .dynamic + +## Check we report a warning when the hash table goes past the end of the file. + +## Case A.1: the hash table ends right before the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=7 %s -o %t7.1.o -DNBUCKET=0x5d -DNCHAIN=0x1 +# RUN: llvm-readelf --hash-symbols %t7.1.o 2>&1 | FileCheck %s --check-prefix=NOERR1 +# NOERR1: Symbol table of .hash for image: +# NOERR1-NEXT: Num Buc: Value Size Type Bind Vis Ndx Name +# NOERR1-NEXT-EMPTY: + +## Case A.2: the hash table ends 1 byte past the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=7 %s -o %t7.2.o -DNBUCKET=0x5e -DNCHAIN=0x1 +# RUN: llvm-readelf --hash-symbols %t7.2.o 2>&1 | FileCheck %s --check-prefix=ERR2 -DFILE=%t7.2.o +# ERR2: Symbol table of .hash for image: +# ERR2-NEXT: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 94, nchain = 1{{$}} +# ERR2-NOT: {{.}} + +## Case B.1: the hash table ends right before the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=7 %s -o %t7.3.o -DNBUCKET=0x1 -DNCHAIN=0x5d +# RUN: llvm-readelf --hash-symbols %t7.3.o 2>&1 | \ +# RUN: FileCheck %s --implicit-check-not="warning:" --check-prefix=NOERR2 -DFILE=%t7.3.o +# NOERR2: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1) +# NOERR2: Symbol table of .hash for image: +# NOERR2-NEXT: Num Buc: Value Size Type Bind Vis Ndx Name +# NOERR2-NOT: {{.}} + +## Case B.2: the hash table ends 1 byte past the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=7 %s -o %t7.4.o -DNBUCKET=0x1 -DNCHAIN=0x5e +# RUN: llvm-readelf --hash-symbols %t7.4.o 2>&1 | FileCheck %s --check-prefix=ERR3 -DFILE=%t7.4.o +# ERR3: Symbol table of .hash for image: +# ERR3-NEXT: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 1, nchain = 94{{$}} +# ERR3-NOT: {{.}} + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + NBucket: [[NBUCKET]] + Chain: [ 0 ] + NChain: [[NCHAIN]] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + Sections: + - Section: .hash + - Section: .dynamic diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-table.test b/llvm/test/tools/llvm-readobj/ELF/hash-table.test index 2abfdd01baf2..8cbe615eee22 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-table.test @@ -115,3 +115,133 @@ ProgramHeaders: VAddr: 0x1010 Sections: - Section: .dynamic + +## Each SHT_HASH section starts with two 32-bit fields: nbucket and nchain. +## Check we report an error when a DT_HASH value points to data that has size less than 8 bytes. + +# RUN: yaml2obj --docnum=4 %s -o %t4.o +# RUN: llvm-readelf --hash-table %t4.o 2>&1 | FileCheck %s --check-prefix=ERR1 -DFILE=%t4.o +# RUN: llvm-readobj --hash-table %t4.o 2>&1 | FileCheck %s --check-prefix=ERR1 -DFILE=%t4.o + +# ERR1: HashTable { +# ERR1-NEXT: warning: '[[FILE]]': the hash table at offset 0x2b1 goes past the end of the file (0x2b8){{$}} +# ERR1-NEXT: } + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + Chain: [ 0 ] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x239 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + FileSize: 0x23a + Sections: + - Section: .hash + - Section: .dynamic + +## Check we report a warning when the hash table goes past the end of the file. + +## Case A.1: the hash table ends right before the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=5 %s -o %t5.1.o -DNBUCKET=0x5d -DNCHAIN=0x1 +# RUN: llvm-readelf --hash-table %t5.1.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOERR1 --implicit-check-not="warning:" +# RUN: llvm-readobj --hash-table %t5.1.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOERR1 --implicit-check-not="warning:" + +# NOERR1: HashTable { +# NOERR1-NEXT: Num Buckets: 93 +# NOERR1-NEXT: Num Chains: 1 +## Here we would dump the rest of the file as buckets array because we have a broken nbucket field. +## No need to check what we dump, we only want to test that we have no unexpected warnings/crashes. +# NOERR1-NEXT: Buckets: +# NOERR1-NEXT: Chains: [0] +# NOERR1-NEXT: } + +## Case A.2: the hash table ends 1 byte past the EOF. We have a broken nbucket +## field that has a value larger than the number of buckets. +# RUN: yaml2obj --docnum=5 %s -o %t5.2.o -DNBUCKET=0x5e -DNCHAIN=0x1 +# RUN: llvm-readelf --hash-table %t5.2.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 -DFILE=%t5.2.o --implicit-check-not="warning:" +# RUN: llvm-readobj --hash-table %t5.2.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 -DFILE=%t5.2.o --implicit-check-not="warning:" + +# ERR2: HashTable { +# ERR2-NEXT: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 94, nchain = 1{{$}} +# ERR2-NEXT: } + +## Case B.1: the hash table ends right before the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=5 %s -o %t5.3.o -DNBUCKET=0x1 -DNCHAIN=0x5d +# RUN: llvm-readelf --hash-table %t5.3.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOERR2 -DFILE=%t5.3.o --implicit-check-not="warning:" +# RUN: llvm-readobj --hash-table %t5.3.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOERR2 -DFILE=%t5.3.o --implicit-check-not="warning:" + +# NOERR2: warning: '[[FILE]]': hash table nchain (93) differs from symbol count derived from SHT_DYNSYM section header (1) +# NOERR2: HashTable { +# NOERR2-NEXT: Num Buckets: 1 +# NOERR2-NEXT: Num Chains: 93 +# NOERR2-NEXT: Buckets: [0] +## Here we would dump the rest of the file as chain array because we have a broken nchain field. +## No need to check what we dump, we only want to test that we have no unexpected warnings/crashes. +# NOERR2-NEXT: Chains: +# NOERR2-NEXT: } + +## Case B.2: the hash table ends 1 byte past the EOF. We have a broken nchain +## field that has a value larger than the number of chains. +# RUN: yaml2obj --docnum=5 %s -o %t5.4.o -DNBUCKET=0x1 -DNCHAIN=0x5e +# RUN: llvm-readelf --hash-table %t5.4.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR3 -DFILE=%t5.4.o --implicit-check-not="warning:" +# RUN: llvm-readobj --hash-table %t5.4.o 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR3 -DFILE=%t5.4.o --implicit-check-not="warning:" + +# ERR3: warning: '[[FILE]]': hash table nchain (94) differs from symbol count derived from SHT_DYNSYM section header (1) +# ERR3: HashTable { +# ERR3-NEXT: warning: '[[FILE]]': the hash table at offset 0x54 goes past the end of the file (0x1d4), nbucket = 1, nchain = 94{{$}} +# ERR3-NEXT: } + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .hash + Type: SHT_HASH + Flags: [ SHF_ALLOC ] + Bucket: [ 0 ] + NBucket: [[NBUCKET]] + Chain: [ 0 ] + NChain: [[NCHAIN]] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Entries: + - Tag: DT_HASH + Value: 0x0 + - Tag: DT_NULL + Value: 0x0 +DynamicSymbols: [] +ProgramHeaders: + - Type: PT_LOAD + Sections: + - Section: .hash + - Section: .dynamic diff --git a/llvm/test/tools/yaml2obj/ELF/hash-section.yaml b/llvm/test/tools/yaml2obj/ELF/hash-section.yaml index 4aad9c11fd59..2274e4c9a7c9 100644 --- a/llvm/test/tools/yaml2obj/ELF/hash-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/hash-section.yaml @@ -276,3 +276,39 @@ Sections: Type: SHT_HASH Size: 0x1 Chain: [ 1 ] + +## Check we can override "nbucket" and "nchain" values of a SHT_HASH section using "NBucket" +## and "NChain" tags. Check that the section size is unaffected when we do this. + +# RUN: yaml2obj --docnum=14 %s -o %t14 +# RUN: llvm-readobj --sections --section-data %t14 | FileCheck %s --check-prefix=OVERRIDE + +# OVERRIDE: Name: .hash +# OVERRIDE-NEXT: Type: SHT_HASH +# OVERRIDE-NEXT: Flags [ +# OVERRIDE-NEXT: ] +# OVERRIDE-NEXT: Address: 0x0 +# OVERRIDE-NEXT: Offset: 0x34 +# OVERRIDE-NEXT: Size: 28 +# OVERRIDE-NEXT: Link: 0 +# OVERRIDE-NEXT: Info: 0 +# OVERRIDE-NEXT: AddressAlignment: 0 +# OVERRIDE-NEXT: EntrySize: 0 +# OVERRIDE-NEXT: SectionData ( +# OVERRIDE-NEXT: 0000: AA000000 BB000000 01000000 02000000 +# OVERRIDE-NEXT: 0010: 03000000 04000000 05000000 +# OVERRIDE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_386 +Sections: + - Name: .hash + Type: SHT_HASH + Bucket: [ 1, 2 ] + Chain: [ 3, 4, 5 ] + NBucket: 0xAA + NChain: 0xBB diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp index 3b0d2ee725ff..0d1a7518dad3 100644 --- a/llvm/tools/llvm-ifs/llvm-ifs.cpp +++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp @@ -26,6 +26,7 @@ #include "llvm/TextAPI/MachO/TextAPIWriter.h" #include #include +#include using namespace llvm; using namespace llvm::yaml; @@ -34,8 +35,8 @@ using namespace llvm::MachO; #define DEBUG_TYPE "llvm-ifs" namespace { -const VersionTuple IFSVersionCurrent(1, 2); -} +const VersionTuple IFSVersionCurrent(2, 0); +} // end anonymous namespace static cl::opt Action("action", cl::desc(""), cl::value_desc("write-ifs | write-bin"), @@ -76,6 +77,7 @@ std::string getTypeName(IFSSymbolType Type) { } struct IFSSymbol { + IFSSymbol() = default; IFSSymbol(std::string SymbolName) : Name(SymbolName) {} std::string Name; uint64_t Size; @@ -85,6 +87,8 @@ struct IFSSymbol { bool operator<(const IFSSymbol &RHS) const { return Name < RHS.Name; } }; +LLVM_YAML_IS_SEQUENCE_VECTOR(IFSSymbol) + namespace llvm { namespace yaml { /// YAML traits for IFSSymbolType. @@ -124,6 +128,7 @@ template <> struct ScalarTraits { /// YAML traits for IFSSymbol. template <> struct MappingTraits { static void mapping(IO &IO, IFSSymbol &Symbol) { + IO.mapRequired("Name", Symbol.Name); IO.mapRequired("Type", Symbol.Type); // The need for symbol size depends on the symbol type. if (Symbol.Type == IFSSymbolType::NoType) @@ -140,20 +145,6 @@ template <> struct MappingTraits { static const bool flow = true; }; -/// YAML traits for set of IFSSymbols. -template <> struct CustomMappingTraits> { - static void inputOne(IO &IO, StringRef Key, std::set &Set) { - std::string Name = Key.str(); - IFSSymbol Sym(Name); - IO.mapRequired(Name.c_str(), Sym); - Set.insert(Sym); - } - - static void output(IO &IO, std::set &Set) { - for (auto &Sym : Set) - IO.mapRequired(Sym.Name.c_str(), const_cast(Sym)); - } -}; } // namespace yaml } // namespace llvm @@ -167,7 +158,7 @@ class IFSStub { std::string ObjectFileFormat; Optional SOName; std::vector NeededLibs; - std::set Symbols; + std::vector Symbols; IFSStub() = default; IFSStub(const IFSStub &Stub) @@ -186,14 +177,18 @@ namespace yaml { /// YAML traits for IFSStub objects. template <> struct MappingTraits { static void mapping(IO &IO, IFSStub &Stub) { - if (!IO.mapTag("!experimental-ifs-v1", true)) + if (!IO.mapTag("!experimental-ifs-v2", true)) IO.setError("Not a .ifs YAML file."); + + auto OldContext = IO.getContext(); + IO.setContext(&Stub); IO.mapRequired("IfsVersion", Stub.IfsVersion); IO.mapOptional("Triple", Stub.Triple); IO.mapOptional("ObjectFileFormat", Stub.ObjectFileFormat); IO.mapOptional("SOName", Stub.SOName); IO.mapOptional("NeededLibs", Stub.NeededLibs); IO.mapRequired("Symbols", Stub.Symbols); + IO.setContext(&OldContext); } }; } // namespace yaml @@ -218,7 +213,7 @@ static Expected> readInputFile(StringRef FilePath) { return std::move(Stub); } -int writeTbdStub(const llvm::Triple &T, const std::set &Symbols, +int writeTbdStub(const llvm::Triple &T, const std::vector &Symbols, const StringRef Format, raw_ostream &Out) { auto PlatformKindOrError = @@ -280,7 +275,7 @@ int writeTbdStub(const llvm::Triple &T, const std::set &Symbols, return 0; } -int writeElfStub(const llvm::Triple &T, const std::set &Symbols, +int writeElfStub(const llvm::Triple &T, const std::vector &Symbols, const StringRef Format, raw_ostream &Out) { SmallString<0> Storage; Storage.clear(); @@ -387,8 +382,8 @@ int writeIfso(const IFSStub &Stub, bool IsWriteIfs, raw_ostream &Out) { // TODO: Drop ObjectFileFormat, it can be subsumed from the triple. // New Interface Stubs Yaml Format: -// --- !experimental-ifs-v1 -// IfsVersion: 1.0 +// --- !experimental-ifs-v2 +// IfsVersion: 2.0 // Triple: // ObjectFileFormat: // Symbols: @@ -517,7 +512,7 @@ int main(int argc, char *argv[]) { } for (auto &Entry : SymbolMap) - Stub.Symbols.insert(Entry.second); + Stub.Symbols.push_back(Entry.second); std::error_code SysErr; diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 24f41ae49469..ca05f99aa715 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -52,6 +52,8 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MipsABIFlags.h" +#include "llvm/Support/RISCVAttributeParser.h" +#include "llvm/Support/RISCVAttributes.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include @@ -2605,9 +2607,35 @@ template void ELFDumper::printNeededLibraries() { W.startLine() << L << "\n"; } +template +static bool checkHashTable(const ELFFile *Obj, + const typename ELFT::Hash *H, StringRef FileName) { + auto WarnAndReturn = [&](uint64_t Off, const Twine &Msg = "") { + reportWarning(createError("the hash table at offset 0x" + + Twine::utohexstr(Off) + + " goes past the end of the file (0x" + + Twine::utohexstr(Obj->getBufSize()) + ")" + Msg), + FileName); + return false; + }; + + // Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain. + const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word); + const uint64_t SecOffset = (const uint8_t *)H - Obj->base(); + if (Obj->getBufSize() - SecOffset < HeaderSize) + return WarnAndReturn(SecOffset); + + if (Obj->getBufSize() - SecOffset - HeaderSize < + ((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word)) + return WarnAndReturn(SecOffset, ", nbucket = " + Twine(H->nbucket) + + ", nchain = " + Twine(H->nchain)); + return true; +} + template void ELFDumper::printHashTable() { DictScope D(W, "HashTable"); - if (!HashTable) + if (!HashTable || + !checkHashTable(ObjF->getELFFile(), HashTable, ObjF->getFileName())) return; W.printNumber("Num Buckets", HashTable->nbucket); W.printNumber("Num Chains", HashTable->nchain); @@ -2678,6 +2706,7 @@ template void ELFDumper::printArchSpecificInfo() { const ELFFile *Obj = ObjF->getELFFile(); switch (Obj->getHeader()->e_machine) { case EM_ARM: + case EM_RISCV: printAttributes(); break; case EM_MIPS: { @@ -2698,40 +2727,45 @@ template void ELFDumper::printArchSpecificInfo() { } } -template void ELFDumper::printAttributes() { - W.startLine() << "Attributes not implemented.\n"; -} - namespace { -template <> void ELFDumper::printAttributes() { - const ELFFile *Obj = ObjF->getELFFile(); - if (Obj->getHeader()->e_machine != EM_ARM) { +template void ELFDumper::printAttributes() { + const ELFFile *Obj = ObjF->getELFFile(); + if (!Obj->isLE()) { W.startLine() << "Attributes not implemented.\n"; return; } + const unsigned Machine = Obj->getHeader()->e_machine; + assert((Machine == EM_ARM || Machine == EM_RISCV) && + "Attributes not implemented."); + DictScope BA(W, "BuildAttributes"); - for (const ELFO::Elf_Shdr &Sec : - unwrapOrError(ObjF->getFileName(), Obj->sections())) { - if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES) + for (const auto &Sec : unwrapOrError(ObjF->getFileName(), Obj->sections())) { + if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES && + Sec.sh_type != ELF::SHT_RISCV_ATTRIBUTES) continue; ArrayRef Contents = unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec)); if (Contents[0] != ELFAttrs::Format_Version) { - errs() << "unrecognised FormatVersion: 0x" - << Twine::utohexstr(Contents[0]) << '\n'; + reportWarning(createError(Twine("unrecognised FormatVersion: 0x") + + Twine::utohexstr(Contents[0])), + ObjF->getFileName()); continue; } - W.printHex("FormatVersion", Contents[0]); if (Contents.size() == 1) continue; - // TODO: Print error and delete the redundant FormatVersion check above. - if (Error E = ARMAttributeParser(&W).parse(Contents, support::little)) - consumeError(std::move(E)); + // TODO: Delete the redundant FormatVersion check above. + if (Machine == EM_ARM) { + if (Error E = ARMAttributeParser(&W).parse(Contents, support::little)) + reportWarning(std::move(E), ObjF->getFileName()); + } else if (Machine == EM_RISCV) { + if (Error E = RISCVAttributeParser(&W).parse(Contents, support::little)) + reportWarning(std::move(E), ObjF->getFileName()); + } } } @@ -3569,6 +3603,11 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { return "MIPS_ABIFLAGS"; } break; + case EM_RISCV: + switch (Type) { + case SHT_RISCV_ATTRIBUTES: + return "RISCV_ATTRIBUTES"; + } } switch (Type) { case SHT_NULL: @@ -3886,9 +3925,7 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { auto StringTable = this->dumper()->getDynamicStringTable(); auto DynSyms = this->dumper()->dynamic_symbols(); - // Try printing .hash - if (auto SysVHash = this->dumper()->getHashTable()) { - OS << "\n Symbol table of .hash for image:\n"; + auto PrintHashTable = [&](const Elf_Hash *SysVHash) { if (ELFT::Is64Bits) OS << " Num Buc: Value Size Type Bind Vis Ndx Name"; else @@ -3917,6 +3954,12 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { Visited[Ch] = true; } } + }; + + if (const Elf_Hash *SysVHash = this->dumper()->getHashTable()) { + OS << "\n Symbol table of .hash for image:\n"; + if (checkHashTable(Obj, SysVHash, this->FileName)) + PrintHashTable(SysVHash); } // Try printing .gnu.hash @@ -4439,6 +4482,9 @@ template void GNUStyle::printHashHistogram(const ELFFile *Obj) { // Print histogram for .hash section if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) { + if (!checkHashTable(Obj, HashTable, this->FileName)) + return; + size_t NBucket = HashTable->nbucket; size_t NChain = HashTable->nchain; ArrayRef Buckets = HashTable->buckets(); diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index d4de44313a29..8d46bd2cb627 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -195,30 +195,30 @@ static bool parseCommand(StringRef InputString, Command &Cmd, // If no cmd, assume it's CODE. Cmd = Command::Code; } - const char *pos = InputString.data(); + const char *Pos = InputString.data(); // Skip delimiters and parse input filename (if needed). if (ClBinaryName.empty()) { - pos += strspn(pos, kDelimiters); - if (*pos == '"' || *pos == '\'') { - char quote = *pos; - pos++; - const char *end = strchr(pos, quote); - if (!end) + Pos += strspn(Pos, kDelimiters); + if (*Pos == '"' || *Pos == '\'') { + char Quote = *Pos; + Pos++; + const char *End = strchr(Pos, Quote); + if (!End) return false; - ModuleName = std::string(pos, end - pos); - pos = end + 1; + ModuleName = std::string(Pos, End - Pos); + Pos = End + 1; } else { - int name_length = strcspn(pos, kDelimiters); - ModuleName = std::string(pos, name_length); - pos += name_length; + int NameLength = strcspn(Pos, kDelimiters); + ModuleName = std::string(Pos, NameLength); + Pos += NameLength; } } else { ModuleName = ClBinaryName; } // Skip delimiters and parse module offset. - pos += strspn(pos, kDelimiters); - int offset_length = strcspn(pos, kDelimiters); - return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset); + Pos += strspn(Pos, kDelimiters); + int OffsetLength = strcspn(Pos, kDelimiters); + return !StringRef(Pos, OffsetLength).getAsInteger(0, ModuleOffset); } static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer, diff --git a/llvm/unittests/Analysis/VectorUtilsTest.cpp b/llvm/unittests/Analysis/VectorUtilsTest.cpp index d471e79842ca..1a06b0994bc0 100644 --- a/llvm/unittests/Analysis/VectorUtilsTest.cpp +++ b/llvm/unittests/Analysis/VectorUtilsTest.cpp @@ -100,10 +100,10 @@ TEST_F(BasicTest, isSplat) { TEST_F(BasicTest, scaleShuffleMask) { SmallVector ScaledMask; - scaleShuffleMask(1, {3,2,0,-2}, ScaledMask); - EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2})); - scaleShuffleMask(4, {3,2,0,-1}, ScaledMask); - EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1})); + scaleShuffleMask(1, {3,2,0,-2}, ScaledMask); + EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2})); + scaleShuffleMask(4, {3,2,0,-1}, ScaledMask); + EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1})); } TEST_F(BasicTest, getSplatIndex) { diff --git a/llvm/unittests/Support/ELFAttributeParserTest.cpp b/llvm/unittests/Support/ELFAttributeParserTest.cpp index ad4e309d8953..8234d4ee176f 100644 --- a/llvm/unittests/Support/ELFAttributeParserTest.cpp +++ b/llvm/unittests/Support/ELFAttributeParserTest.cpp @@ -40,9 +40,9 @@ TEST(AttributeHeaderParser, UnrecognizedFormatVersion) { testParseError(bytes, "unrecognized format-version: 0x1"); } -TEST(AttributeHeaderParser, InvalidSubsectionLength) { +TEST(AttributeHeaderParser, InvalidSectionLength) { static const uint8_t bytes[] = {'A', 3, 0, 0, 0}; - testParseError(bytes, "invalid subsection length 3 at offset 0x1"); + testParseError(bytes, "invalid section length 3 at offset 0x1"); } TEST(AttributeHeaderParser, UnrecognizedVendorName) { diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index 402fadb12d6c..53ef761302ae 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -5,18 +5,17 @@ import lit.util + def parse_args(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='lit') parser.add_argument('test_paths', nargs='+', metavar="TEST_PATH", help='File or path to include in the test suite') - parser.add_argument("--version", - dest="show_version", - help="Show version and exit", - version="lit " + lit.__version__, - action="version") + parser.add_argument('--version', + action='version', + version='%(prog)s ' + lit.__version__) parser.add_argument("-j", "--threads", "--workers", dest="workers", @@ -189,12 +188,15 @@ def parse_args(): return opts + def _positive_int(arg): return _int(arg, 'positive', lambda i: i > 0) + def _non_negative_int(arg): return _int(arg, 'non-negative', lambda i: i >= 0) + def _int(arg, kind, pred): desc = "requires {} integer, but found '{}'" try: @@ -205,6 +207,7 @@ def _int(arg, kind, pred): raise _error(desc, kind, arg) return i + def _case_insensitive_regex(arg): import re try: @@ -212,6 +215,7 @@ def _case_insensitive_regex(arg): except re.error as reason: raise _error("invalid regular expression: '{}', {}", arg, reason) + def _error(desc, *args): msg = desc.format(*args) return argparse.ArgumentTypeError(msg) diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 4b61c8f37761..8c675c0e5ba4 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -20,7 +20,6 @@ def main(builtin_params={}): opts = lit.cl_arguments.parse_args() - params = create_params(builtin_params, opts.user_params) is_windows = platform.system() == 'Windows' diff --git a/llvm/utils/lit/tests/usage.py b/llvm/utils/lit/tests/usage.py index d168c5eff9e3..77b3573c5375 100644 --- a/llvm/utils/lit/tests/usage.py +++ b/llvm/utils/lit/tests/usage.py @@ -1,6 +1,7 @@ -# Basic sanity check that usage works. +# Basic sanity check for `--help` and `--version` options. # -# RUN: %{lit} --help > %t.out -# RUN: FileCheck < %t.out %s +# RUN: %{lit} --help | FileCheck %s --check-prefix=HELP +# RUN: %{lit} --version 2>&1 | FileCheck %s --check-prefix=VERSION # -# CHECK: usage: lit.py [-h] +# HELP: usage: lit [-h] +# VERSION: lit {{[0-9]+\.[0-9]+\.[0-9]+[a-zA-Z0-9]*}} diff --git a/llvm/utils/lit/tests/version.py b/llvm/utils/lit/tests/version.py deleted file mode 100644 index 1d5e152ddd3b..000000000000 --- a/llvm/utils/lit/tests/version.py +++ /dev/null @@ -1,5 +0,0 @@ -# Basic sanity check that --version works. -# -# RUN: %{lit} --version 2>&1 | FileCheck %s -# -# CHECK: lit {{[0-9]+\.[0-9]+\.[0-9]+[a-zA-Z0-9]*}} diff --git a/llvm/utils/llvm-locstats/llvm-locstats.py b/llvm/utils/llvm-locstats/llvm-locstats.py index 7b2f706fca94..dec87f9caf7d 100755 --- a/llvm/utils/llvm-locstats/llvm-locstats.py +++ b/llvm/utils/llvm-locstats/llvm-locstats.py @@ -121,14 +121,16 @@ def draw_location_diff(self, locstats_to_compare): ax = fig.add_subplot(111) init_plot(plt) + comparison_keys = list(coverage_buckets()) ax.bar(buckets, self.variables_coverage_map.values(), align='edge', - tick_label=self.variables_coverage_map.keys(), width=0.4, + width=0.4, label='variables of {}'.format(self.file_name)) ax.bar(buckets_to_compare, locstats_to_compare.variables_coverage_map.values(), color='r', align='edge', width=-0.4, - tick_label=locstats_to_compare.variables_coverage_map.keys(), label='variables of {}'.format(locstats_to_compare.file_name)) + ax.set_xticks(range(len(comparison_keys))) + ax.set_xticklabels(comparison_keys) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) plt.text(0.02, 0.88, diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg.md index af4db423e44e..878ce8f11523 100644 --- a/mlir/docs/Dialects/Linalg.md +++ b/mlir/docs/Dialects/Linalg.md @@ -29,7 +29,7 @@ performed on the Linalg IR and that have influenced its design: 1. Tiled Producer-Consumer Fusion with Parametric Tile-And-Fuse. 1. Map to Parallel and Reduction Loops and Hardware. 1. Vectorization: Rewrite in Vector Form. -1. Lower to Loops (Affine and/or Generic). +1. Lower to Loops (Affine, Generic and Parallel). 1. Lower to Library Calls or Special Instructions, Intrinsics or ISA. 1. Partially Lower to Iterations Over a Finer-Grained Linalg Op. @@ -241,7 +241,7 @@ example: (i, j) -> (i, j), (i, j) -> (i, j) } -#attrs = {args_in: 1, args_out: 1, indexings: #indexing_maps} +#attrs = {args_in: 2, args_out: 1, indexings: #indexing_maps} func @example(%A: memref, %B: memref, %C: memref) { linalg.generic #attrs (%A, %B, %C) { ^bb0(%a: f32, %b: f32): @@ -295,7 +295,7 @@ example: (i, j) -> (i, j), (i, j) -> (i, j) } -#attrs = {args_in: 1, args_out: 1, indexings: #indexing_maps, fun: #fun_attr} +#attrs = {args_in: 2, args_out: 1, indexings: #indexing_maps, fun: #fun_attr} func @example(%A: memref, %B: memref, %C: memref) { linalg.generic #attrs (%A, %B, %C) { ^bb0(%a: f32, %b: f32): diff --git a/mlir/docs/Dialects/SPIR-V.md b/mlir/docs/Dialects/SPIR-V.md index 20919c08244b..acf56f137e16 100644 --- a/mlir/docs/Dialects/SPIR-V.md +++ b/mlir/docs/Dialects/SPIR-V.md @@ -15,6 +15,8 @@ Vulkan and OpenCL. It is fully defined in a [human-readable specification][SpirvSpec]; the syntax of various SPIR-V instructions are encoded in a [machine-readable grammar][SpirvGrammar]. +[TOC] + ## Design Guidelines SPIR-V is a binary intermediate language that serves dual purpose: on one side, @@ -459,8 +461,9 @@ can be represented in the dialect as ``` Operation documentation is written in each op's Op Definition Spec using -TableGen. A markdown version of the doc can be found at -[mlir.llvm.org][LlvmMlirSpirvDoc] or generated using `mlir-tblgen -gen-doc`. +TableGen. A markdown version of the doc can be generated using +`mlir-tblgen -gen-doc` and is attached in the +[Operation definitions](#operation-definitions) section. ### Ops from extended instruction sets @@ -1224,6 +1227,10 @@ conversion][MlirDialectConversionSignatureConversion] might be needed as well. operations contained within its region are valid operations in the SPIR-V dialect. +## Operation definitions + +[include "Dialects/SPIRVOps.md"] + [Spirv]: https://www.khronos.org/registry/spir-v/ [SpirvSpec]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html [SpirvLogicalLayout]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_logicallayout_a_logical_layout_of_a_module @@ -1270,7 +1277,6 @@ dialect. [GitHubDialectTracking]: https://github.com/tensorflow/mlir/issues/302 [GitHubLoweringTracking]: https://github.com/tensorflow/mlir/issues/303 [GenSpirvUtilsPy]: https://github.com/llvm/llvm-project/blob/master/mlir/utils/spirv/gen_spirv_dialect.py -[LlvmMlirSpirvDoc]: ../Dialects/SPIRVOps/ [CustomTypeAttrTutorial]: ../DefiningAttributesAndTypes/ [VulkanSpirv]: https://renderdoc.org/vkspec_chunked/chap40.html#spirvenv [VulkanShaderInterface]: https://renderdoc.org/vkspec_chunked/chap14.html#interfaces-resources diff --git a/mlir/docs/Passes.md b/mlir/docs/Passes.md index 231594b82fc7..866f3666d7d0 100644 --- a/mlir/docs/Passes.md +++ b/mlir/docs/Passes.md @@ -4,295 +4,46 @@ This document describes the available MLIR passes and their contracts. [TOC] -## Affine dialect lowering (`-lower-affine`) +## General Transformation Passes -Convert operations from the affine dialect into operations from the loop and -standard dialects. +[include "GeneralPasses.md"] -`affine.for` operations are converted to `loop.for` operations that are free of -certain structural restrictions (on their bounds and step). `affine.if` is -similarly converted to the `loop.if` operation. `affine.apply` operations are -converted into sequences of primitive arithmetic operations from the standard -dialect that have the same effect, using operands of the `index` type. -Consequently, named maps and sets thare are no longer in use may be removed from -the module. +## Conversion Passes -For example, `%r = affine.apply affine_map<(d0, d1)[s0] -> (d0 + 2*d1 + -s0)>(%d0, %d1)[%s0]` -can be converted into: +[include "ConversionPasses.md"] -```mlir -%d0 = <...> -%d1 = <...> -%s0 = <...> -%0 = constant 2 : index -%1 = muli %0, %d1 -%2 = addi %d0, %1 -%r = addi %2, %s0 -``` +## Quantizer Passes -### Input invariant +[include "QuantizerPasses.md"] -- no `Tensor` types; +## `affine` Dialect Passes -These restrictions may be lifted in the future. +[include "AffinePasses.md"] -### Output IR +## `fxpmath` Dialect Passes -Functions with `affine.for` and `affine.if` operations eliminated. These -functions may contain operations from the Standard dialect in addition to those -already present before the pass. +[include "FxpMathPasses.md"] -### Invariants +## `gpu` Dialect Passes -- Functions without a body are not modified. -- The semantics of the other functions is preserved. -- Individual operations other than those mentioned above are not modified if - they do not depend on the loop iterator value or on the result of - `affine.apply`. +[include "GPUPasses.md"] -## Conversion from Standard to LLVM IR dialect (`-convert-std-to-llvm`) +## `linalg` Dialect Passes -Convert standard operations into the LLVM IR dialect operations. +[include "LinalgPasses.md"] -### Input invariant +## `llvm` Dialect Passes -- operations including: arithmetic on integers and floats, constants, direct - calls, returns and branches; -- no `tensor` types; -- all `vector` are one-dimensional; -- all blocks are reachable by following the successors of the first basic - block; +[include "LLVMPasses.md"] -If other operations are present and their results are required by the LLVM IR -dialect operations, the pass will fail. Any LLVM IR operations or types already -present in the IR will be kept as is. +## `loop` Dialect Passes -### Output IR +[include "LoopPasses.md"] -Functions converted to LLVM IR. Function arguments types are converted -one-to-one. Function results are converted one-to-one and, in case more than 1 -value is returned, packed into an LLVM IR struct type. Function calls and -returns are updated accordingly. Block argument types are updated to use LLVM IR -types. +## `quant` Dialect Passes -## Data Copy DMA generation (`-affine-data-copy-generate`) +[include "QuantPasses.md"] -Replaces all loads and stores on memref's living in 'slowMemorySpace' by -introducing DMA operations (strided DMA if necessary) to transfer data to/from -`fastMemorySpace` and rewriting the original load's/store's to instead -load/store from the allocated fast memory buffers. Additional options specify -the identifier corresponding to the fast memory space and the amount of fast -memory space available. The pass traverses through the nesting structure, -recursing to inner levels if necessary to determine at what depth DMA transfers -need to be placed so that the allocated buffers fit within the memory capacity -provided. If this is not possible (for example, when the elemental type itself -is of size larger than the DMA capacity), an error with location information is -emitted. The DMA transfers are also hoisted up past all loops with respect to -which the transfers are invariant. +## `spv` Dialect Passes -Input - -```mlir -func @loop_nest_tiled() -> memref<256x1024xf32> { - %0 = alloc() : memref<256x1024xf32> - affine.for %i0 = 0 to 256 step 32 { - affine.for %i1 = 0 to 1024 step 32 { - affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) { - affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) { - %1 = affine.load %0[%i2, %i3] : memref<256x1024xf32> - } - } - } - } - return %0 : memref<256x1024xf32> -} -``` - -Output (with flags: -affine-data-copy-generate -affine-data-copy-generate-fast-mem-space=2) - -```mlir -module { - func @loop_nest_tiled() -> memref<256x1024xf32> { - %c262144 = constant 262144 : index - %c0 = constant 0 : index - %0 = alloc() : memref<256x1024xf32> - %1 = alloc() : memref<256x1024xf32, 2> - %2 = alloc() : memref<1xi32> - affine.dma_start %0[%c0, %c0], %1[%c0, %c0], %2[%c0], %c262144 : memref<256x1024xf32>, memref<256x1024xf32, 2>, memref<1xi32> - affine.dma_wait %2[%c0], %c262144 : memref<1xi32> - affine.for %arg0 = 0 to 256 step 32 { - affine.for %arg1 = 0 to 1024 step 32 { - affine.for %arg2 = #map1(%arg0) to #map2(%arg0) { - affine.for %arg3 = #map1(%arg1) to #map2(%arg1) { - %3 = affine.load %1[%arg2, %arg3] : memref<256x1024xf32, 2> - } - } - } - } - dealloc %2 : memref<1xi32> - dealloc %1 : memref<256x1024xf32, 2> - return %0 : memref<256x1024xf32> - } -} -``` - -## Loop tiling (`-affine-loop-tile`) - -Performs tiling or blocking of loop nests. It currently works on perfect loop -nests. - -## Loop unroll (`-affine-loop-unroll`) - -This pass implements loop unrolling. It is able to unroll loops with arbitrary -bounds, and generate a cleanup loop when necessary. - -## Loop unroll and jam (`-affine-loop-unroll-jam`) - -This pass implements unroll and jam for loops. It works on both perfect or -imperfect loop nests. - -## Loop fusion (`-affine-loop-fusion`) - -Performs fusion of loop nests using a slicing-based approach. The fused loop -nests, when possible, are rewritten to access significantly smaller local -buffers instead of the original memref's, and the latter are often -either completely optimized away or contracted. This transformation leads to -enhanced locality and lower memory footprint through the elimination or -contraction of temporaries / intermediate memref's. These benefits are sometimes -achieved at the expense of redundant computation through a cost model that -evaluates available choices such as the depth at which a source slice should be -materialized in the designation slice. - -## Memref bound checking (`-memref-bound-check`) - -Checks all load's and store's on memref's for out of bound accesses, and reports -any out of bound accesses (both overrun and underrun) with location information. - -```mlir -test/Transforms/memref-bound-check.mlir:19:13: error: 'load' op memref out of upper bound access along dimension #2 - %x = load %A[%idx0, %idx1] : memref<9 x 9 x i32> - ^ -test/Transforms/memref-bound-check.mlir:19:13: error: 'load' op memref out of lower bound access along dimension #2 - %x = load %A[%idx0, %idx1] : memref<9 x 9 x i32> - ^ -``` - -## Memref dataflow optimization (`-memref-dataflow-opt`) - -This pass performs store to load forwarding for memref's to eliminate memory -accesses and potentially the entire memref if all its accesses are forwarded. - -Input - -```mlir -func @store_load_affine_apply() -> memref<10x10xf32> { - %cf7 = constant 7.0 : f32 - %m = alloc() : memref<10x10xf32> - affine.for %i0 = 0 to 10 { - affine.for %i1 = 0 to 10 { - affine.store %cf7, %m[%i0, %i1] : memref<10x10xf32> - %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32> - %v1 = addf %v0, %v0 : f32 - } - } - return %m : memref<10x10xf32> -} -``` - -Output - -```mlir -module { - func @store_load_affine_apply() -> memref<10x10xf32> { - %cst = constant 7.000000e+00 : f32 - %0 = alloc() : memref<10x10xf32> - affine.for %arg0 = 0 to 10 { - affine.for %arg1 = 0 to 10 { - affine.store %cst, %0[%arg0, %arg1] : memref<10x10xf32> - %1 = addf %cst, %cst : f32 - } - } - return %0 : memref<10x10xf32> - } -} - -``` - -## Memref dependence analysis (`-memref-dependence-check`) - -This pass performs dependence analysis to determine dependences between pairs of -memory operations (load's and store's) on memref's. Dependence analysis exploits -polyhedral information available (affine maps, expressions, and affine.apply -operations) to precisely represent dependences using affine constraints, while -also computing dependence vectors from them, where each component of the -dependence vector provides a lower and an upper bound on the dependence distance -along the corresponding dimension. - -```mlir -test/Transforms/memref-dataflow-opt.mlir:232:7: note: dependence from 2 to 1 at depth 1 = ([1, 1], [-inf, +inf]) - store %cf9, %m[%idx] : memref<10xf32> -``` - -## Pipeline data transfer (`-affine-pipeline-data-transfer`) - -This pass performs a transformation to overlap non-blocking DMA operations in a -loop with computations through double buffering. This is achieved by advancing -dma_start operations with respect to other operations. - -Input - -```mlir -func @pipelinedatatransfer() { - %0 = alloc() : memref<256xf32> - %1 = alloc() : memref<32xf32, 1> - %2 = alloc() : memref<1xf32> - %c0 = constant 0 : index - %c128 = constant 128 : index - affine.for %i0 = 0 to 8 { - affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32> - affine.dma_wait %2[%c0], %c128 : memref<1xf32> - %3 = affine.load %1[%i0] : memref<32xf32, 1> - %4 = "compute"(%3) : (f32) -> f32 - affine.store %4, %1[%i0] : memref<32xf32, 1> - } - return -} -``` - -Output - -```mlir -module { - func @pipelinedatatransfer() { - %c8 = constant 8 : index - %c0 = constant 0 : index - %0 = alloc() : memref<256xf32> - %c0_0 = constant 0 : index - %c128 = constant 128 : index - %1 = alloc() : memref<2x32xf32, 1> - %2 = alloc() : memref<2x1xf32> - affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> - affine.for %arg0 = 1 to 8 { - affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32> - %8 = affine.apply #map3(%arg0) - %9 = affine.apply #map4(%8) - %10 = affine.apply #map4(%8) - affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> - %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1> - %12 = "compute"(%11) : (f32) -> f32 - affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1> - } - %3 = affine.apply #map3(%c8) - %4 = affine.apply #map4(%3) - %5 = affine.apply #map4(%3) - affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32> - %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1> - %7 = "compute"(%6) : (f32) -> f32 - affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1> - dealloc %2 : memref<2x1xf32> - dealloc %1 : memref<2x32xf32, 1> - return - } -} -``` +[include "SPIRVPasses.md"] diff --git a/mlir/include/mlir/Analysis/LoopAnalysis.h b/mlir/include/mlir/Analysis/LoopAnalysis.h index 5141df736a9c..7ed19ef99f87 100644 --- a/mlir/include/mlir/Analysis/LoopAnalysis.h +++ b/mlir/include/mlir/Analysis/LoopAnalysis.h @@ -82,7 +82,7 @@ bool isVectorizableLoopBody(AffineForOp loop, int *memRefDim, /// 'def' and all its uses have the same shift factor. // TODO(mlir-team): extend this to check for memory-based dependence // violation when we have the support. -bool isInstwiseShiftValid(AffineForOp forOp, ArrayRef shifts); +bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef shifts); } // end namespace mlir #endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H diff --git a/mlir/include/mlir/CMakeLists.txt b/mlir/include/mlir/CMakeLists.txt index 4754391dc39f..594dc6180f50 100644 --- a/mlir/include/mlir/CMakeLists.txt +++ b/mlir/include/mlir/CMakeLists.txt @@ -1,3 +1,6 @@ +add_subdirectory(Conversion) add_subdirectory(Dialect) add_subdirectory(IR) add_subdirectory(Interfaces) +add_subdirectory(Quantizer) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Conversion/CMakeLists.txt b/mlir/include/mlir/Conversion/CMakeLists.txt new file mode 100644 index 000000000000..d4ce2634f450 --- /dev/null +++ b/mlir/include/mlir/Conversion/CMakeLists.txt @@ -0,0 +1,6 @@ + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRConversionPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc ConversionPasses ./) diff --git a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h index d5f48d29ea6c..049e8538d746 100644 --- a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h +++ b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h @@ -27,6 +27,7 @@ class Pass; /// calling the conversion. std::unique_ptr> createSimpleLoopsToGPUPass(unsigned numBlockDims, unsigned numThreadDims); +std::unique_ptr> createSimpleLoopsToGPUPass(); /// Create a pass that converts every loop operation within the body of the /// FuncOp into a GPU launch. The number of workgroups and workgroup size for @@ -37,6 +38,7 @@ createSimpleLoopsToGPUPass(unsigned numBlockDims, unsigned numThreadDims); std::unique_ptr> createLoopToGPUPass(ArrayRef numWorkGroups, ArrayRef workGroupSize); +std::unique_ptr> createLoopToGPUPass(); /// Creates a pass that converts loop.parallel operations into a gpu.launch /// operation. The mapping of loop dimensions to launch dimensions is derived diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td new file mode 100644 index 000000000000..5553655fafae --- /dev/null +++ b/mlir/include/mlir/Conversion/Passes.td @@ -0,0 +1,267 @@ +//===-- Passes.td - Conversion pass definition file --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_PASSES +#define MLIR_CONVERSION_PASSES + +include "mlir/Pass/PassBase.td" + +//===----------------------------------------------------------------------===// +// AffineToStandard +//===----------------------------------------------------------------------===// + +def ConvertAffineToStandard : Pass<"lower-affine"> { + let summary = "Lower Affine operations to a combination of Standard and Loop " + "operations"; + let description = [{ + + Convert operations from the affine dialect into operations from the loop and + standard dialects. + + `affine.for` operations are converted to `loop.for` operations that are free + of certain structural restrictions (on their bounds and step). `affine.if` + is similarly converted to the `loop.if` operation. `affine.apply` operations + are converted into sequences of primitive arithmetic operations from the + standard dialect that have the same effect, using operands of the `index` + type. Consequently, named maps and sets thare are no longer in use may be + removed from the module. + + For example, `%r = affine.apply affine_map<(d0, d1)[s0] -> (d0 + 2*d1 + + s0)>(%d0, %d1)[%s0]` + can be converted into: + + ```mlir + %d0 = <...> + %d1 = <...> + %s0 = <...> + %0 = constant 2 : index + %1 = muli %0, %d1 + %2 = addi %d0, %1 + %r = addi %2, %s0 + ``` + + #### Input invariant + + - no `Tensor` types; + + These restrictions may be lifted in the future. + + #### Output IR + + Functions with `affine.for` and `affine.if` operations eliminated. These + functions may contain operations from the Standard dialect in addition to + those already present before the pass. + + #### Invariants + + - Functions without a body are not modified. + - The semantics of the other functions is preserved. + - Individual operations other than those mentioned above are not modified + if they do not depend on the loop iterator value or on the result of + `affine.apply`. + }]; + let constructor = "mlir::createLowerAffinePass()"; +} + +//===----------------------------------------------------------------------===// +// AVX512ToLLVM +//===----------------------------------------------------------------------===// + +def ConvertAVX512ToLLVM : Pass<"convert-avx512-to-llvm"> { + let summary = "Convert the operations from the avx512 dialect into the LLVM " + "dialect"; + let constructor = "mlir::createConvertAVX512ToLLVMPass()"; +} + +//===----------------------------------------------------------------------===// +// GPUToCUDA +//===----------------------------------------------------------------------===// + +def ConvertGpuLaunchFuncToCudaCalls : Pass<"launch-func-to-cuda"> { + let summary = "Convert all launch_func ops to CUDA runtime calls"; + let constructor = "mlir::createConvertGpuLaunchFuncToCudaCallsPass()"; +} + +//===----------------------------------------------------------------------===// +// GPUToNVVM +//===----------------------------------------------------------------------===// + +def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm"> { + let summary = "Generate NVVM operations for gpu operations"; + let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; +} + +//===----------------------------------------------------------------------===// +// GPUToROCDL +//===----------------------------------------------------------------------===// + +def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl"> { + let summary = "Generate ROCDL operations for gpu operations"; + let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()"; +} + +//===----------------------------------------------------------------------===// +// GPUToSPIRV +//===----------------------------------------------------------------------===// + +def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv"> { + let summary = "Convert GPU dialect to SPIR-V dialect"; + let constructor = "mlir::createConvertGPUToSPIRVPass()"; +} + +//===----------------------------------------------------------------------===// +// GPUToVulkan +//===----------------------------------------------------------------------===// + +def ConvertGpuLaunchFuncToVulkanLaunchFunc + : Pass<"convert-gpu-launch-to-vulkan-launch"> { + let summary = "Convert gpu.launch_func to vulkanLaunch external call"; + let constructor = "mlir::createConvertGpuLaunchFuncToVulkanLaunchFuncPass()"; +} + +def ConvertVulkanLaunchFuncToVulkanCalls : Pass<"launch-func-to-vulkan"> { + let summary = "Convert vulkanLaunch external call to Vulkan runtime external " + "calls"; + let constructor = "mlir::createConvertVulkanLaunchFuncToVulkanCallsPass()"; +} + +//===----------------------------------------------------------------------===// +// LinalgToLLVM +//===----------------------------------------------------------------------===// + +def ConvertLinalgToLLVM : Pass<"convert-linalg-to-llvm"> { + let summary = "Convert the operations from the linalg dialect into the LLVM " + "dialect"; + let constructor = "mlir::createConvertLinalgToLLVMPass()"; +} + +//===----------------------------------------------------------------------===// +// LinalgToSPIRV +//===----------------------------------------------------------------------===// + +def ConvertLinalgToSPIRV : Pass<"convert-linalg-to-spirv"> { + let summary = "Convert Linalg ops to SPIR-V ops"; + let constructor = "mlir::createLinalgToSPIRVPass()"; +} + +//===----------------------------------------------------------------------===// +// LoopToStandard +//===----------------------------------------------------------------------===// + +def ConvertLoopToStandard : Pass<"convert-loop-to-std"> { + let summary = "Convert Loop dialect to Standard dialect, replacing structured" + " control flow with a CFG"; + let constructor = "mlir::createLowerToCFGPass()"; +} + +//===----------------------------------------------------------------------===// +// LoopsToGPU +//===----------------------------------------------------------------------===// + +def ConvertSimpleLoopsToGPU : Pass<"convert-loops-to-gpu"> { + let summary = "Convert top-level loops to GPU kernels"; + let constructor = "mlir::createSimpleLoopsToGPUPass()"; + let options = [ + Option<"numBlockDims", "gpu-block-dims", "unsigned", /*default=*/"1u", + "Number of GPU block dimensions for mapping">, + Option<"numThreadDims", "gpu-thread-dims", "unsigned", /*default=*/"1u", + "Number of GPU thread dimensions for mapping"> + ]; +} + +def ConvertLoopsToGPU : Pass<"convert-loop-op-to-gpu"> { + let summary = "Convert top-level loop::ForOp to GPU kernels"; + let constructor = "mlir::createLoopToGPUPass()"; + let options = [ + ListOption<"numWorkGroups", "gpu-num-workgroups", "int64_t", + "Num workgroups in the GPU launch", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">, + ListOption<"workGroupSize", "gpu-workgroup-size", "int64_t", + "Workgroup Size in the GPU launch", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> + ]; +} + +def ConvertParallelLoopToGpu : Pass<"convert-parallel-loops-to-gpu"> { + let summary = "Convert mapped loop.parallel ops to gpu launch operations"; + let constructor = "mlir::createParallelLoopToGpuPass()"; +} + +//===----------------------------------------------------------------------===// +// StandardToLLVM +//===----------------------------------------------------------------------===// + +def ConvertStandardToLLVM : Pass<"convert-std-to-llvm"> { + let summary = "Convert scalar and vector operations from the Standard to the " + "LLVM dialect"; + let description = [{ + Convert standard operations into the LLVM IR dialect operations. + + #### Input invariant + + - operations including: arithmetic on integers and floats, constants, + direct calls, returns and branches; + - no `tensor` types; + - all `vector` are one-dimensional; + - all blocks are reachable by following the successors of the first basic + block; + + If other operations are present and their results are required by the LLVM + IR dialect operations, the pass will fail. Any LLVM IR operations or types + already present in the IR will be kept as is. + + #### Output IR + + Functions converted to LLVM IR. Function arguments types are converted + one-to-one. Function results are converted one-to-one and, in case more than + 1 value is returned, packed into an LLVM IR struct type. Function calls and + returns are updated accordingly. Block argument types are updated to use + LLVM IR types. + }]; + let constructor = "mlir::createLowerToLLVMPass()"; + let options = [ + Option<"useAlloca", "use-alloca", "bool", /*default=*/"false", + "Use `alloca` instead of `call @malloc` for converting std.alloc">, + Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool", + /*default=*/"false", + "Replace FuncOp's MemRef arguments with bare pointers to the MemRef " + "element types">, + Option<"emitCWrappers", "emit-c-wrappers", "bool", /*default=*/"false", + "Emit wrappers for C-compatible pointer-to-struct memref " + "descriptors">, + Option<"indexBitwidth", "index-bitwidth", "unsigned", + /*default=*/"kDeriveIndexBitwidthFromDataLayout", + "Bitwidth of the index type, 0 to use size of machine word">, + ]; +} + +//===----------------------------------------------------------------------===// +// StandardToSPIRV +//===----------------------------------------------------------------------===// + +def LegalizeStandardForSPIRV : Pass<"legalize-std-for-spirv"> { + let summary = "Legalize standard ops for SPIR-V lowering"; + let constructor = "mlir::createLegalizeStdOpsForSPIRVLoweringPass()"; +} + +def ConvertStandardToSPIRV : Pass<"convert-std-to-spirv"> { + let summary = "Convert Standard Ops to SPIR-V dialect"; + let constructor = "mlir::createConvertStandardToSPIRVPass()"; +} + +//===----------------------------------------------------------------------===// +// VectorToLLVM +//===----------------------------------------------------------------------===// + +def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm"> { + let summary = "Lower the operations from the vector dialect into the LLVM " + "dialect"; + let constructor = "mlir::createConvertVectorToLLVMPass()"; +} + +#endif // MLIR_CONVERSION_PASSES diff --git a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h index 5c45a3c32ca0..e96b7cfe76e5 100644 --- a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h +++ b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h @@ -5,23 +5,18 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #ifndef MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_ #define MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_ -#include "mlir/Transforms/DialectConversion.h" - namespace mlir { class MLIRContext; -class ModuleOp; -template class OpPassBase; +class OwningRewritePatternList; /// Collect a set of patterns to convert from the Vector dialect to loops + std. void populateVectorToAffineLoopsConversionPatterns( MLIRContext *context, OwningRewritePatternList &patterns); -/// Create a pass to convert vector operations to affine loops + std dialect. -OpPassBase *createLowerVectorToLoopsPass(); - } // namespace mlir #endif // MLIR_CONVERSION_VECTORTOLLVM_CONVERTVECTORTOLOOPS_H_ diff --git a/mlir/include/mlir/Dialect/Affine/CMakeLists.txt b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt index f33061b2d87c..404c926f60ed 100644 --- a/mlir/include/mlir/Dialect/Affine/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt @@ -1 +1,7 @@ add_subdirectory(IR) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRAffinePassIncGen) + +add_mlir_doc(Passes -gen-pass-doc AffinePasses ./) diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index 735c6c0360f5..75ff4a33649d 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -42,10 +42,14 @@ std::unique_ptr> createAffineDataCopyGenerationPass( unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024, uint64_t fastMemCapacityBytes = std::numeric_limits::max()); +/// Overload relying on pass options for initialization. +std::unique_ptr> createAffineDataCopyGenerationPass(); /// Creates a pass to perform tiling on loop nests. std::unique_ptr> createLoopTilingPass(uint64_t cacheSizeBytes); +/// Overload relying on pass options for initialization. +std::unique_ptr> createLoopTilingPass(); /// Creates a loop unrolling pass with the provided parameters. /// 'getUnrollFactor' is a function callback for clients to supply a function @@ -67,6 +71,8 @@ createLoopUnrollAndJamPass(int unrollJamFactor = -1); /// target-independent, n-D super-vector abstraction. std::unique_ptr> createSuperVectorizePass(ArrayRef virtualVectorSize); +/// Overload relying on pass options for initialization. +std::unique_ptr> createSuperVectorizePass(); } // end namespace mlir diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td new file mode 100644 index 000000000000..4ae53571d1f4 --- /dev/null +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -0,0 +1,70 @@ +//===-- Passes.td - Affine pass definition file ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions for passes within the Affine/ directory. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_AFFINE_PASSES +#define MLIR_DIALECT_AFFINE_PASSES + +include "mlir/Pass/PassBase.td" + +def AffineDataCopyGeneration : Pass<"affine-data-copy-generate"> { + let summary = "Generate explicit copying for affine memory operations"; + let constructor = "mlir::createAffineDataCopyGenerationPass()"; +} + +def AffineLoopInvariantCodeMotion : Pass<"affine-loop-invariant-code-motion"> { + let summary = "Hoist loop invariant instructions outside of affine loops"; + let constructor = "mlir::createAffineLoopInvariantCodeMotionPass()"; +} + +def AffineLoopTiling : Pass<"affine-loop-tile"> { + let summary = "Tile affine loop nests"; + let constructor = "mlir::createLoopTilingPass()"; +} + +def AffineLoopUnroll : Pass<"affine-loop-unroll"> { + let summary = "Unroll affine loops"; + let constructor = "mlir::createLoopUnrollPass()"; +} + +def AffineLoopUnrollAndJam : Pass<"affine-loop-unroll-jam"> { + let summary = "Unroll and jam affine loops"; + let constructor = "mlir::createLoopUnrollAndJamPass()"; +} + +def AffineVectorize : Pass<"affine-super-vectorize"> { + let summary = "Vectorize to a target independent n-D vector abstraction"; + let constructor = "mlir::createSuperVectorizePass()"; + let options = [ + ListOption<"vectorSizes", "virtual-vector-size", "int64_t", + "Specify an n-D virtual vector size for vectorization", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">, + // Optionally, the fixed mapping from loop to fastest varying MemRef + // dimension for all the MemRefs within a loop pattern: + // the index represents the loop depth, the value represents the k^th + // fastest varying memory dimension. + // This is voluntarily restrictive and is meant to precisely target a + // particular loop/op pair, for testing purposes. + ListOption<"fastestVaryingPattern", "test-fastest-varying", "int64_t", + "Specify a 1-D, 2-D or 3-D pattern of fastest varying memory " + "dimensions to match. See defaultPatterns in Vectorize.cpp for " + "a description and examples. This is used for testing purposes", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> + ]; +} + +def SimplifyAffineStructures : Pass<"simplify-affine-structures"> { + let summary = "Simplify affine expressions in maps/sets and normalize " + "memrefs"; + let constructor = "mlir::createSimplifyAffineStructuresPass()"; +} + +#endif // MLIR_DIALECT_AFFINE_PASSES diff --git a/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt b/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt index 2a493d6c1b20..b2d6dc660dbb 100644 --- a/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/FxpMathOps/CMakeLists.txt @@ -1,2 +1,8 @@ add_mlir_dialect(FxpMathOps fxpmath) add_mlir_doc(FxpMathOps -gen-dialect-doc FxpMathDialect Dialects/) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRFxpMathPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc FxpMathPasses ./) diff --git a/mlir/include/mlir/Dialect/FxpMathOps/Passes.h b/mlir/include/mlir/Dialect/FxpMathOps/Passes.h index 1039bcc9cb34..bec2e74f095f 100644 --- a/mlir/include/mlir/Dialect/FxpMathOps/Passes.h +++ b/mlir/include/mlir/Dialect/FxpMathOps/Passes.h @@ -13,6 +13,8 @@ #ifndef MLIR_DIALECT_FXPMATHOPS_PASSES_H #define MLIR_DIALECT_FXPMATHOPS_PASSES_H +#include + namespace mlir { class FuncOp; template class OpPassBase; @@ -23,11 +25,11 @@ namespace fxpmath { /// arithmetic. This will leave unrecognized real math ops as-is and is /// typically followed by a pass that lowers any unrecognized ops to a pure /// floating point form. -OpPassBase *createLowerUniformRealMathPass(); +std::unique_ptr> createLowerUniformRealMathPass(); /// Creates a pass that lowers uniform-quantized qcast/dcast ops to equivalent /// operations that perform quantize/dequantize. -OpPassBase *createLowerUniformCastsPass(); +std::unique_ptr> createLowerUniformCastsPass(); } // namespace fxpmath } // namespace mlir diff --git a/mlir/include/mlir/Dialect/FxpMathOps/Passes.td b/mlir/include/mlir/Dialect/FxpMathOps/Passes.td new file mode 100644 index 000000000000..254d200029f5 --- /dev/null +++ b/mlir/include/mlir/Dialect/FxpMathOps/Passes.td @@ -0,0 +1,24 @@ +//===-- Passes.td - FxpMath pass definition file -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_FXPMATH_PASSES +#define MLIR_DIALECT_FXPMATH_PASSES + +include "mlir/Pass/PassBase.td" + +def FxpMathLowerUniformCasts : Pass<"fxpmath-lower-uniform-casts"> { + let summary = "Lowers uniform-quantized casts"; + let constructor = "mlir::fxpmath::createLowerUniformCastsPass()"; +} + +def FxpMathLowerUniformRealMath : Pass<"fxpmath-lower-uniform-real-math"> { + let summary = "Lowers uniform-quantized real math ops to integer arithmetic"; + let constructor = "mlir::fxpmath::createLowerUniformRealMathPass()"; +} + +#endif // MLIR_DIALECT_FXPMATH_PASSES diff --git a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt index 8151c82f43d1..6c80b4c8e3b9 100644 --- a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt @@ -10,3 +10,9 @@ set(LLVM_TARGET_DEFINITIONS ParallelLoopMapperAttr.td) mlir_tablegen(ParallelLoopMapperEnums.h.inc -gen-enum-decls) mlir_tablegen(ParallelLoopMapperEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRParallelLoopMapperEnumsGen) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRGPUPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc GPUPasses ./) diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td new file mode 100644 index 000000000000..563624308297 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/Passes.td @@ -0,0 +1,19 @@ +//===-- Passes.td - GPU pass definition file ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_GPU_PASSES +#define MLIR_DIALECT_GPU_PASSES + +include "mlir/Pass/PassBase.td" + +def GpuKernelOutlining : Pass<"gpu-kernel-outlining"> { + let summary = "Outline gpu.launch bodies to kernel functions"; + let constructor = "mlir::createGpuKernelOutliningPass()"; +} + +#endif // MLIR_DIALECT_GPU_PASSES diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt index d7e581b1b949..cc4fd1bafc72 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(Transforms) + set(LLVM_TARGET_DEFINITIONS LLVMOps.td) mlir_tablegen(LLVMOps.h.inc -gen-op-decls) mlir_tablegen(LLVMOps.cpp.inc -gen-op-defs) diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt new file mode 100644 index 000000000000..a2fd81c23e11 --- /dev/null +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRLLVMPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc LLVMPasses ./) diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td new file mode 100644 index 000000000000..0dc193e794f5 --- /dev/null +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td @@ -0,0 +1,19 @@ +//===-- Passes.td - LLVM pass definition file --------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES +#define MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES + +include "mlir/Pass/PassBase.td" + +def LLVMLegalizeForExport : Pass<"llvm-legalize-for-export"> { + let summary = "Legalize LLVM dialect to be convertible to LLVM IR"; + let constructor = "mlir::LLVM::createLegalizeForExportPass()"; +} + +#endif // MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h index 34de176a998e..e40d63661b77 100644 --- a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h +++ b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h @@ -63,6 +63,7 @@ class LinalgDependenceGraph { using dependence_range = iterator_range; enum DependenceType { RAR = 0, RAW, WAR, WAW, NumTypes }; + static StringRef getDependenceTypeStr(DependenceType depType); // Builds a linalg dependence graph for the ops of type LinalgOp under `f`. static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases, FuncOp f); diff --git a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt index 9f57627c321f..076c2dfbccb5 100644 --- a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt @@ -1,2 +1,8 @@ add_subdirectory(IR) add_subdirectory(Transforms) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRLinalgPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc LinalgPasses ./) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h index 7756a08d5cb2..77d9d9fc2631 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -29,6 +29,9 @@ namespace mlir { namespace linalg { class ConvOp; +class PoolingMaxOp; +class PoolingMinOp; +class PoolingSumOp; /// Returns the name mangled library call name to disambiguate between different /// overloads at the C level. The name mangling scheme is basic and uses MLIR @@ -60,12 +63,13 @@ std::string generateLibraryCallName(Operation *op); SmallVector makeAffineDimExprs(unsigned num, unsigned &startIdx, MLIRContext *context); -/// Builds the indexing expressions for a ConvOp `op`. Returns the vector of -/// AffineMaps representing: -/// `stride[i] * xs[i] + dilation[i] * zs[i] - pad_low[i]` -SmallVector weightedConvInputIndex(ConvOp op, - ArrayRef xs, - ArrayRef zs); +/// Builds the indexing expressions for a ConvOp/PoolingOp `op`. Returns the +/// vector of AffineMaps representing: +/// `stride[i] * outputDims[i] + dilation[i] * windowDims[i] - pad_low[i]` +template +extern SmallVector +weightedPoolingInputIndex(PoolingOp op, ArrayRef outputDims, + ArrayRef windowDims); /// Returns `maybeMap.get()` if `maybeMap` is set, otherwise returns the /// symbol-less identity map of `rank`. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index ab53fc30aca8..31b89bc1b2bf 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -251,7 +251,69 @@ def MatmulOp : LinalgStructured_Op<"matmul", [NInputs<2>, NOutputs<1>]> { let hasFolder = 1; } -def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> { +/// A base class for pooling operation such as conv. The arguments must contain +/// optional arguments `strides`, `dilations` and `padding` with following type: +/// OptionalAttr:$strides +/// OptionalAttr:$dilations +/// OptionalAttr:$padding +/// `stirdes` denotes the step of each window along the dimension. +class PoolingBase_Op props> + : LinalgStructured_Op { + let description = [{ + Performs an N-D pooling operation similarly to the description in the TF + documentation: + https://www.tensorflow.org/api_docs/python/tf/nn/pool + + Different from the description, this operation doesn't perform on batch and + channel. It only takes tensors of rank `N`. + + ``` + output[x[0], ..., x[N-1]] = + REDUCE_{z[0], ..., z[N-1]} + input[ + x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], + ... + x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1] + ], + ``` + + The required optional arguments are: + - strides: an i64 array specifying the stride (i.e. step) for window + loops. + - dilations: an i64 array specifying the filter upsampling/input + downsampling rate + - padding: an i64 array of pairs (low, high) specifying the number of + elements to pad along a dimension. + + If strides or dilations attributes are missing then the default value is + one for each of the input dimensions. Similarly, padding values are zero + for both low and high in each of the dimensions, if not specified. + }]; + + code commonUtils = libraryCallName # [{ + int64_t getStride(unsigned i) { + assert(i < getNumWindowLoops()); + if (!strides().hasValue()) return 1; + return strides()->getValue()[i] + .cast().getValue().getSExtValue(); + } + + int64_t getDilation(unsigned i) { + assert(i < getNumWindowLoops()); + if (!dilations().hasValue()) return 1; + return dilations()->getValue()[i] + .cast().getValue().getSExtValue(); + } + + int64_t getLowPad(unsigned i) { + assert(i < getNumWindowLoops()); + if (!padding().hasValue()) return 0; + return padding().getValue().getValue({i, 0}); + } + }]; +} + +def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> { let description = [{ Generic n-D convolution as described in the TF documentation: @@ -282,7 +344,7 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> { OptionalAttr:$dilations, OptionalAttr:$padding); - let extraClassDeclaration = libraryCallName # [{ + let extraClassDeclaration = commonUtils # [{ // TODO(ntv) extend to support more than 1 dimensions and potentially // grouping too. unsigned getNumBatchDimensions() { return 1; } @@ -309,26 +371,6 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> { return iters; } - int64_t getStride(unsigned i) { - assert(i < getNumWindowLoops()); - if (!strides().hasValue()) return 1; - return strides()->getValue()[i] - .cast().getValue().getSExtValue(); - } - - int64_t getDilation(unsigned i) { - assert(i < getNumWindowLoops()); - if (!dilations().hasValue()) return 1; - return dilations()->getValue()[i] - .cast().getValue().getSExtValue(); - } - - int64_t getLowPad(unsigned i) { - assert(i < getNumWindowLoops()); - if (!padding().hasValue()) return 0; - return padding().getValue().getValue({i, 0}); - } - // F(z0, ..., zN-1, q, k) * // I(b, x0 + z0 - pad_low_0, ..., xN-1 + zN-1 - pad_low_N-1, q) // -> O(b, x0, ..., xN-1, k) @@ -358,7 +400,7 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> { // Window reduction dims: sum_{z[0], ..., z[N-1], q} auto zs = makeAffineDimExprs(nWin, idx, context); // Construct the weighedSum expression. - auto ws = weightedConvInputIndex(*this, xs, zs); + auto ws = weightedPoolingInputIndex(*this, xs, zs); return SmallVector{ // filter[z[0], ..., z[N-1], q, k] AffineMap::get(idx, 0, concat(concat(zs, qs), ks)), @@ -378,6 +420,86 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> { let hasFolder = 1; } +class SingleInputPoolingBase_Op + : PoolingBase_Op, NOutputs<1>]> { + let description = [{ + A base class for single input pooling function. + + TODO: Figure out a better way to handle window dimensions, i.e., eliminate + the fake memref. + The window dimensions are specified by argument `windowDims`. The i-th + dimension in the shape of `windowDims` denotes the size of the window along + dimension i. For example, if the window size is 2x3, then a memref<2x3> + should be passed to the operation as `windowDims`. + }]; + + let arguments = (ins AnyStridedMemRef:$input, + AnyStridedMemRef:$windowDims, + AnyStridedMemRef:$output, + OptionalAttr:$strides, + OptionalAttr:$dilations, + OptionalAttr:$padding); + + let extraClassDeclaration = commonUtils# [{ + llvm::Optional> referenceIterators() { + // Outer parallel loops are always the number of output dimensions. + unsigned nPar = getOutputShapedType(0).getRank(); + // The window loops has the same number loops with output dimensions. + unsigned nWin = nPar; + SmallVector iters(nPar, getParallelIteratorTypeName()); + iters.reserve(nPar + nWin); + iters.append(nWin, getWindowIteratorTypeName()); + return iters; + } + + llvm::Optional> referenceIndexingMaps() { + MLIRContext *context = getContext(); + auto nPar = getNumParallelLoops(); + auto nWin = getNumWindowLoops(); + assert(nWin > 0 && "expected at least one window dimension"); + unsigned idx = 0; + auto outputDims = makeAffineDimExprs(nPar, idx, context); + auto windowDims = makeAffineDimExprs(nWin, idx, context); + // Construct the weighedSum expression. + auto inputDims = + weightedPoolingInputIndex(*this, outputDims, windowDims); + return SmallVector{ + // input + AffineMap::get(idx, 0, inputDims), + // windowDims + AffineMap::get(idx, 0, windowDims), + // output + AffineMap::get(idx, 0, outputDims) + }; + } + }]; + + let verifier = [{ return ::verify(*this); }]; + + let hasFolder = 1; +} + +def PoolingMaxOp: SingleInputPoolingBase_Op<"pooling_max"> { + let description = [{ + Takes max op as pooling operation, i.e., it samples the maximum value in the + window. + }]; +} + +def PoolingMinOp: SingleInputPoolingBase_Op<"pooling_min"> { + let description = [{ + Takes min op as pooling operation, i.e., it samples the minimum value in the + window. + }]; +} + +def PoolingSumOp: SingleInputPoolingBase_Op<"pooling_sum"> { + let description = [{ + Takes add op as pooling operation, i.e., it accumulates the values in the + window. + }]; +} + //===----------------------------------------------------------------------===// // Generic Linalg ops. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td index 8fcc1ceea502..46fb9881aba5 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td @@ -100,6 +100,10 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { //===------------------------------------------------------------------===// // Input and Output arguments handling. //===------------------------------------------------------------------===// + InterfaceMethod< + "Return one single buffer at position `$i`.", + "Value", "getBuffer", (ins "unsigned":$i) + >, InterfaceMethod< "Return the number of inputs and outputs, irrespective of their buffer " "or tensor type.", diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h index f546d3670b6a..b13b6d268226 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h @@ -184,6 +184,10 @@ class StructuredOpTraits //==========================================================================// // Input and Output arguments handling. //==========================================================================// + Value getBuffer(unsigned i) { + assert(i < getNumInputsAndOutputBuffers() && "overflowing buffers index"); + return this->getOperation()->getOperand(i); + } /// Return the number of inputs and outputs, irrespective of their buffer or /// tensor type. unsigned getNumInputsAndOutputs() { return nInputs() + nOutputs(); } diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index 61f88c10470a..9f52e360c7fb 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -20,8 +20,10 @@ namespace mlir { class FuncOp; class ModuleOp; template class OpPassBase; +class Pass; std::unique_ptr> createLinalgFusionPass(); +std::unique_ptr createLinalgFusionOfTensorOpsPass(); std::unique_ptr> createLinalgTilingPass(ArrayRef tileSizes = {}); @@ -31,6 +33,7 @@ createLinalgTilingToParallelLoopsPass(ArrayRef tileSizes = {}); std::unique_ptr> createLinalgPromotionPass(bool dynamicBuffers); +std::unique_ptr> createLinalgPromotionPass(); /// Create a pass to convert Linalg operations to loop.for loops and /// std.load/std.store accesses. diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td new file mode 100644 index 000000000000..210ad1092c66 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -0,0 +1,70 @@ +//===-- Passes.td - Linalg pass definition file ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_PASSES +#define MLIR_DIALECT_LINALG_PASSES + +include "mlir/Pass/PassBase.td" + +def LinalgFusion : Pass<"linalg-fusion"> { + let summary = "Fuse operations in the linalg dialect"; + let constructor = "mlir::createLinalgFusionPass()"; +} + +def LinalgFusionOfTensorOps : Pass<"linalg-fusion-for-tensor-ops"> { + let summary = "Fuse operations on RankedTensorType in linalg dialect"; + let constructor = "mlir::createLinalgFusionOfTensorOpsPass()"; +} + +def LinalgLowerToAffineLoops : Pass<"convert-linalg-to-affine-loops"> { + let summary = "Lower the operations from the linalg dialect into affine " + "loops"; + let constructor = "mlir::createConvertLinalgToAffineLoopsPass()"; +} + +def LinalgLowerToLoops : Pass<"convert-linalg-to-loops"> { + let summary = "Lower the operations from the linalg dialect into loops"; + let constructor = "mlir::createConvertLinalgToLoopsPass()"; +} + +def LinalgLowerToParallelLoops : Pass<"convert-linalg-to-parallel-loops"> { + let summary = "Lower the operations from the linalg dialect into parallel " + "loops"; + let constructor = "mlir::createConvertLinalgToParallelLoopsPass()"; +} + +def LinalgPromotion : Pass<"linalg-promote-subviews"> { + let summary = "Promote subview ops to local buffers"; + let constructor = "mlir::createLinalgPromotionPass()"; + let options = [ + Option<"dynamicBuffers", "test-promote-dynamic", "bool", + /*default=*/"false", "Test generation of dynamic promoted buffers"> + ]; +} + +def LinalgTiling : Pass<"linalg-tile"> { + let summary = "Tile operations in the linalg dialect"; + let constructor = "mlir::createLinalgTilingPass()"; + let options = [ + ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", + "Test generation of dynamic promoted buffers", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> + ]; +} + +def LinalgTilingToParallelLoops : Pass<"linalg-tile-to-parallel-loops"> { + let summary = "Tile operations in the linalg dialect to parallel loops"; + let constructor = "mlir::createLinalgTilingToParallelLoopsPass()"; + let options = [ + ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", + "Test generation of dynamic promoted buffers", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> + ]; +} + +#endif // MLIR_DIALECT_LINALG_PASSES diff --git a/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt b/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt index 4a838cc1d52d..2627cbb542da 100644 --- a/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt @@ -1,2 +1,8 @@ add_mlir_dialect(LoopOps loop) add_mlir_doc(LoopOps -gen-dialect-doc LoopDialect Dialects/) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRLoopPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc LoopPasses ./) diff --git a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td index 08f61c493e02..e202a4013a19 100644 --- a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td +++ b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td @@ -246,7 +246,9 @@ def IfOp : Loop_Op<"if", } def ParallelOp : Loop_Op<"parallel", - [AttrSizedOperandSegments, SingleBlockImplicitTerminator<"YieldOp">]> { + [AttrSizedOperandSegments, + DeclareOpInterfaceMethods, + SingleBlockImplicitTerminator<"YieldOp">]> { let summary = "parallel for operation"; let description = [{ The "loop.parallel" operation represents a loop nest taking 4 groups of SSA diff --git a/mlir/include/mlir/Dialect/LoopOps/Passes.td b/mlir/include/mlir/Dialect/LoopOps/Passes.td new file mode 100644 index 000000000000..444dcfe22201 --- /dev/null +++ b/mlir/include/mlir/Dialect/LoopOps/Passes.td @@ -0,0 +1,34 @@ +//===-- Passes.td - Loop pass definition file --------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LOOP_PASSES +#define MLIR_DIALECT_LOOP_PASSES + +include "mlir/Pass/PassBase.td" + +def LoopParallelLoopFusion : Pass<"parallel-loop-fusion"> { + let summary = "Fuse adjacent parallel loops"; + let constructor = "mlir::createParallelLoopFusionPass()"; +} + +def LoopParallelLoopSpecialization : Pass<"parallel-loop-specialization"> { + let summary = "Specialize parallel loops for vectorization"; + let constructor = "mlir::createParallelLoopSpecializationPass()"; +} + +def LoopParallelLoopTiling : Pass<"parallel-loop-tiling"> { + let summary = "Tile parallel loops"; + let constructor = "mlir::createParallelLoopTilingPass()"; + let options = [ + ListOption<"tileSizes", "parallel-loop-tile-sizes", "int64_t", + "Factors to tile parallel loops by", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> + ]; +} + +#endif // MLIR_DIALECT_LOOP_PASSES diff --git a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt index b18726736e94..1a48e4928b33 100644 --- a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt @@ -1,2 +1,8 @@ add_mlir_dialect(QuantOps quant) add_mlir_doc(QuantOps -gen-dialect-doc QuantDialect Dialects/) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRQuantPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc QuantPasses ./) diff --git a/mlir/include/mlir/Dialect/Quant/Passes.td b/mlir/include/mlir/Dialect/Quant/Passes.td new file mode 100644 index 000000000000..f55a43006977 --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/Passes.td @@ -0,0 +1,26 @@ +//===-- Passes.td - Quant pass definition file -------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_QUANT_PASSES +#define MLIR_DIALECT_QUANT_PASSES + +include "mlir/Pass/PassBase.td" + +def QuantConvertConst : Pass<"quant-convert-const"> { + let summary = "Converts constants followed by qbarrier to actual quantized " + "values"; + let constructor = "mlir::quant::createConvertConstPass()"; +} + +def QuantConvertSimulatedQuant : Pass<"quant-convert-simulated-quantization"> { + let summary = "Converts training-time simulated quantization ops to " + "corresponding quantize/dequantize casts"; + let constructor = "mlir::quant::createConvertSimulatedQuantPass()"; +} + +#endif // MLIR_DIALECT_QUANT_PASSES diff --git a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt index 771d4c1a43bb..15f4a4dfe847 100644 --- a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt @@ -1,5 +1,5 @@ add_mlir_dialect(SPIRVOps spv) -add_mlir_doc(SPIRVOps -gen-dialect-doc SPIRVDialect Dialects/) +add_mlir_doc(SPIRVOps -gen-op-doc SPIRVOps Dialects/) set(LLVM_TARGET_DEFINITIONS SPIRVBase.td) mlir_tablegen(SPIRVEnums.h.inc -gen-enum-decls) @@ -30,3 +30,9 @@ set(LLVM_TARGET_DEFINITIONS TargetAndABI.td) mlir_tablegen(TargetAndABI.h.inc -gen-struct-attr-decls) mlir_tablegen(TargetAndABI.cpp.inc -gen-struct-attr-defs) add_public_tablegen_target(MLIRSPIRVTargetAndABIIncGen) + +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRSPIRVPassIncGen) + +add_mlir_doc(Passes -gen-pass-doc SPIRVPasses ./) diff --git a/mlir/include/mlir/Dialect/SPIRV/Passes.td b/mlir/include/mlir/Dialect/SPIRV/Passes.td new file mode 100644 index 000000000000..a03849955797 --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/Passes.td @@ -0,0 +1,30 @@ +//===-- Passes.td - SPIRV pass definition file -------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_PASSES +#define MLIR_DIALECT_SPIRV_PASSES + +include "mlir/Pass/PassBase.td" + +def SPIRVCompositeTypeLayout : Pass<"decorate-spirv-composite-type-layout"> { + let summary = "Decorate SPIR-V composite type with layout info"; + let constructor = "mlir::spirv::createDecorateSPIRVCompositeTypeLayoutPass()"; +} + +def SPIRVLowerABIAttributes : Pass<"spirv-lower-abi-attrs"> { + let summary = "Decorate SPIR-V composite type with layout info"; + let constructor = "mlir::spirv::createLowerABIAttributesPass()"; +} + +def SPIRVUpdateVCE : Pass<"spirv-update-vce"> { + let summary = "Deduce and attach minimal (version, capabilities, extensions) " + "requirements to spv.module ops"; + let constructor = "mlir::spirv::createUpdateVersionCapabilityExtensionPass()"; +} + +#endif // MLIR_DIALECT_SPIRV_PASSES diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index d54791a65410..bb37bb28a18c 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -72,6 +72,15 @@ constexpr StringRef getFunAttrName() { return "fun"; } /// function that implements the structured op. constexpr StringRef getLibraryCallAttrName() { return "library_call"; } +/// Attribute name for the StrArrayAttr which encodes the value of strides. +constexpr StringRef getStridesAttrName() { return "strides"; } + +/// Attribute name for the StrArrayAttr which encodes the value of dilations. +constexpr StringRef getDilationsAttrName() { return "dilations"; } + +/// Attribute name for the StrArrayAttr which encodes the value of paddings. +constexpr StringRef getPaddingAttrName() { return "padding"; } + /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index d0629e4ff724..a0ad88347bd9 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -1315,10 +1315,12 @@ def Vector_TransposeOp : VectorType getResultType() { return result().getType().cast(); } + void getTransp(SmallVectorImpl &results); }]; let assemblyFormat = [{ $vector `,` $transp attr-dict `:` type($vector) `to` type($result) }]; + let hasFolder = 1; } def Vector_TupleGetOp : diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 14deb85fb2f0..ff65da62a115 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -49,9 +49,16 @@ class AffineMap { static AffineMap get(unsigned dimCount, unsigned symbolCount, MLIRContext *context); + /// Returns an affine map with `dimCount` dimensions and `symbolCount` symbols + /// mapping to the given results. The array of results cannot be empty. static AffineMap get(unsigned dimCount, unsigned symbolCount, ArrayRef results); + /// Returns an affine map with `dimCount` dimensions and `symbolCount` mapping + /// to the given results, where the number of results can be zero. + static AffineMap get(unsigned dimCount, unsigned symbolCount, + ArrayRef results, MLIRContext *context); + /// Returns a single constant result affine map. static AffineMap getConstantMap(int64_t val, MLIRContext *context); @@ -208,9 +215,13 @@ struct MutableAffineMap { MLIRContext *context; }; -/// Simplify an affine map by simplifying its underlying AffineExpr results. +/// Simplifies an affine map by simplifying its underlying AffineExpr results. AffineMap simplifyAffineMap(AffineMap map); +/// Returns a map with the same dimension and symbol count as `map`, but whose +/// results are the unique affine expressions of `map`. +AffineMap removeDuplicateExprs(AffineMap map); + /// Returns a map of codomain to domain dimensions such that the first codomain /// dimension for a particular domain dimension is selected. /// Returns an empty map if the input map is empty or if `map` is not invertible diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index 5b42132d463a..1c6b16f22989 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -188,13 +188,23 @@ class OpBuilder : public Builder { setInsertionPoint(op); } - explicit OpBuilder(Block *block) : OpBuilder(block, block->end()) {} - OpBuilder(Block *block, Block::iterator insertPoint) : OpBuilder(block->getParent()) { setInsertionPoint(block, insertPoint); } + /// Create a builder and set the insertion point to before the first operation + /// in the block but still inside th block. + static OpBuilder atBlockBegin(Block *block) { + return OpBuilder(block, block->begin()); + } + + /// Create a builder and set the insertion point to after the last operation + /// in the block but still inside the block. + static OpBuilder atBlockEnd(Block *block) { + return OpBuilder(block, block->end()); + } + /// This class represents a saved insertion point. class InsertPoint { public: diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index 4671b4081401..08d267a9cf18 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -22,8 +22,11 @@ #include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h" #include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h" #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h" +#include "mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h" #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" +#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/FxpMathOps/Passes.h" #include "mlir/Dialect/GPU/Passes.h" @@ -35,6 +38,8 @@ #include "mlir/Quantizer/Transforms/Passes.h" #include "mlir/Transforms/LocationSnapshot.h" #include "mlir/Transforms/Passes.h" +#include "mlir/Transforms/ViewOpGraph.h" +#include "mlir/Transforms/ViewRegionGraph.h" #include @@ -48,94 +53,47 @@ namespace mlir { // individual passes. // The global registry is interesting to interact with the command-line tools. inline void registerAllPasses() { - // At the moment we still rely on global initializers for registering passes, - // but we may not do it in the future. - // We must reference the passes in such a way that compilers will not - // delete it all as dead code, even with whole program optimization, - // yet is effectively a NO-OP. As the compiler isn't smart enough - // to know that getenv() never returns -1, this will do the job. - if (std::getenv("bar") != (char *)-1) - return; - // Init general passes - createCanonicalizerPass(); - createCSEPass(); - createSuperVectorizePass({}); - createLoopUnrollPass(); - createLoopUnrollAndJamPass(); - createSimplifyAffineStructuresPass(); - createLoopFusionPass(); - createLoopInvariantCodeMotionPass(); - createAffineLoopInvariantCodeMotionPass(); - createPipelineDataTransferPass(); - createLowerAffinePass(); - createLoopTilingPass(0); - createLoopCoalescingPass(); - createAffineDataCopyGenerationPass(0, 0); - createMemRefDataFlowOptPass(); - createStripDebugInfoPass(); - createPrintOpStatsPass(); - createInlinerPass(); - createSymbolDCEPass(); - createLocationSnapshotPass({}); - - // AVX512 - createConvertAVX512ToLLVMPass(); - - // GPUtoRODCLPass - createLowerGpuOpsToROCDLOpsPass(); - - // FxpOpsDialect passes - fxpmath::createLowerUniformRealMathPass(); - fxpmath::createLowerUniformCastsPass(); +#define GEN_PASS_REGISTRATION +#include "mlir/Transforms/Passes.h.inc" - // GPU - createGpuKernelOutliningPass(); - createSimpleLoopsToGPUPass(0, 0); - createLoopToGPUPass({}, {}); + // Conversion passes +#define GEN_PASS_REGISTRATION +#include "mlir/Conversion/Passes.h.inc" + + // Affine +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Affine/Passes.h.inc" - // CUDA - createConvertGpuLaunchFuncToCudaCallsPass(); - createLowerGpuOpsToNVVMOpsPass(); + // FxpMath +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/FxpMathOps/Passes.h.inc" + + // GPU +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/GPU/Passes.h.inc" // Linalg - createLinalgFusionPass(); - createLinalgTilingPass(); - createLinalgTilingToParallelLoopsPass(); - createLinalgPromotionPass(0); - createConvertLinalgToLoopsPass(); - createConvertLinalgToParallelLoopsPass(); - createConvertLinalgToAffineLoopsPass(); - createConvertLinalgToLLVMPass(); +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Linalg/Passes.h.inc" // LLVM - LLVM::createLegalizeForExportPass(); +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc" - // LoopOps - createParallelLoopCollapsingPass(); - createParallelLoopFusionPass(); - createParallelLoopSpecializationPass(); - createParallelLoopTilingPass(); + // Loop +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/LoopOps/Passes.h.inc" - // QuantOps - quant::createConvertSimulatedQuantPass(); - quant::createConvertConstPass(); - quantizer::createAddDefaultStatsPass(); - quantizer::createRemoveInstrumentationPass(); - quantizer::registerInferQuantizedTypesPass(); + // Quant +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Quant/Passes.h.inc" +#define GEN_PASS_REGISTRATION +#include "mlir/Quantizer/Transforms/Passes.h.inc" // SPIR-V - spirv::createDecorateSPIRVCompositeTypeLayoutPass(); - spirv::createLowerABIAttributesPass(); - spirv::createUpdateVersionCapabilityExtensionPass(); - createConvertGPUToSPIRVPass(); - createConvertStandardToSPIRVPass(); - createLegalizeStdOpsForSPIRVLoweringPass(); - createLinalgToSPIRVPass(); - - // Vulkan - createConvertGpuLaunchFuncToVulkanLaunchFuncPass(); - createConvertVulkanLaunchFuncToVulkanCallsPass(); +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/SPIRV/Passes.h.inc" } } // namespace mlir diff --git a/mlir/include/mlir/Pass/PassBase.td b/mlir/include/mlir/Pass/PassBase.td new file mode 100644 index 000000000000..29478047f757 --- /dev/null +++ b/mlir/include/mlir/Pass/PassBase.td @@ -0,0 +1,85 @@ +//===-- PassBase.td - Base pass definition file ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains definitions for defining pass registration and other +// mechanisms. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_PASS_PASSBASE +#define MLIR_PASS_PASSBASE + +//===----------------------------------------------------------------------===// +// Options +//===----------------------------------------------------------------------===// + +class Option { + // The name for the C++ option variable. + string cppName = varName; + + // The command line argument to use for this option. + string argument = arg; + + // The C++ type of the option. + string type = valueType; + + // The default value of the option. "" corresponds to no default. + string defaultValue = default; + + // A description for this option. + string description = desc; + + // A set of additional flags to pass along to the option constructor. + string additionalOptFlags = additionalFlags; +} + +class ListOption + : Option {} + +//===----------------------------------------------------------------------===// +// Statistics +//===----------------------------------------------------------------------===// + +class Statistic { + // The C++ variable name for the statistic. + string cppName = varName; + + // The displayed name of the statistic, similar to the argument of an option. + string name = statName; + + // The description of the statistic. + string description = desc; +} + +//===----------------------------------------------------------------------===// +// Pass +//===----------------------------------------------------------------------===// + +class Pass { + // The command line argument of the pass. + string argument = passArg; + + // A short 1-line summary of the pass. + string summary = ""; + + // A human readable description of the pass. + string description = ""; + + // A C++ constructor call to create an instance of this pass. + code constructor = [{}]; + + // A set of options provided by this pass. + list