From b7da0e982675ef05115117d6c4102172f95cf4aa Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 13 Aug 2024 15:58:36 +0000 Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5-bogner [skip ci] --- .../include/clang-include-cleaner/Types.h | 4 +- .../include-cleaner/lib/Analysis.cpp | 41 +- .../include-cleaner/lib/HTMLReport.cpp | 19 +- .../include-cleaner/lib/Types.cpp | 3 +- .../include-cleaner/tool/IncludeCleaner.cpp | 2 +- .../unittests/AnalysisTest.cpp | 75 +- .../unittests/IncludeSpellerTest.cpp | 3 +- .../include-cleaner/unittests/RecordTest.cpp | 48 +- clang/docs/APINotes.rst | 12 + clang/include/clang/APINotes/Types.h | 7 + clang/lib/APINotes/APINotesFormat.h | 2 +- clang/lib/APINotes/APINotesReader.cpp | 6 + clang/lib/APINotes/APINotesWriter.cpp | 7 + clang/lib/APINotes/APINotesYAMLCompiler.cpp | 4 + clang/lib/Sema/SemaAPINotes.cpp | 4 + .../Inputs/Headers/SwiftImportAs.apinotes | 2 + clang/test/APINotes/swift-import-as.cpp | 2 + .../matrix-index-operator-sign-conversion.cpp | 20 + compiler-rt/lib/builtins/aarch64/sme-abi.S | 4 +- flang/test/Evaluate/fold-out_of_range.f90 | 5 +- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 1 + libc/src/math/CMakeLists.txt | 1 + libc/src/math/expm1f16.h | 21 + libc/src/math/generic/CMakeLists.txt | 28 +- libc/src/math/generic/expf16.cpp | 61 +- libc/src/math/generic/expm1f16.cpp | 132 ++++ libc/src/math/generic/expxf16.h | 65 ++ libc/test/UnitTest/FPMatcher.h | 32 + libc/test/src/math/CMakeLists.txt | 31 +- libc/test/src/math/expm1f16_test.cpp | 40 ++ libc/test/src/math/smoke/CMakeLists.txt | 21 +- libc/test/src/math/smoke/expm1f16_test.cpp | 108 +++ libcxx/utils/synchronize_csv_status_files.py | 236 +++++++ .../GDBRemoteCommunicationClient.cpp | 7 +- .../GDBRemoteCommunicationServerLLGS.cpp | 4 + .../Process/gdb-remote/ProcessGDBRemote.cpp | 3 + .../gdb_remote_client/TestStopPCs.py | 15 +- llvm/include/llvm/IR/CFG.h | 48 ++ .../Utils/SampleProfileLoaderBaseImpl.h | 21 +- llvm/lib/Analysis/MemoryBuiltins.cpp | 3 +- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 53 +- llvm/lib/Analysis/MemorySSA.cpp | 45 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 5 +- llvm/lib/CodeGen/GlobalMerge.cpp | 2 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 22 +- llvm/lib/CodeGen/TargetPassConfig.cpp | 6 +- llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 9 +- llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp | 2 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 7 + llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 6 +- .../Transforms/IPO/AttributorAttributes.cpp | 4 +- .../Transforms/IPO/FunctionSpecialization.cpp | 5 - llvm/lib/Transforms/IPO/GlobalOpt.cpp | 5 - .../InstCombine/InstCombineCalls.cpp | 8 +- .../InstCombine/InstCombineCompares.cpp | 2 +- .../InstCombine/InstructionCombining.cpp | 3 +- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 14 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 7 +- llvm/lib/Transforms/Utils/GlobalStatus.cpp | 5 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 - .../Transforms/Vectorize/LoopVectorize.cpp | 14 +- .../Transforms/Vectorize/SLPVectorizer.cpp | 40 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 37 +- .../Analysis/CostModel/X86/cttz-codesize.ll | 10 +- .../Analysis/CostModel/X86/cttz-latency.ll | 10 +- .../CostModel/X86/cttz-sizelatency.ll | 10 +- .../CostModel/X86/intrinsic-cost-kinds.ll | 6 +- .../ScalarEvolution/different-loops-recs.ll | 656 ++++++++++++------ .../AArch64/arm64ec-hybrid-patchable.ll | 24 +- ...rleaving-reductions-predicated-scalable.ll | 4 +- .../AArch64/intrinsic-cttz-elts-sve.ll | 3 +- llvm/test/CodeGen/AArch64/sve-fcmp.ll | 116 ++++ .../CodeGen/AArch64/sve-fp-int-min-max.ll | 3 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 26 +- llvm/test/CodeGen/X86/cttz.ll | 12 +- llvm/test/CodeGen/X86/known-never-zero.ll | 339 ++++----- llvm/test/CodeGen/X86/pr89877.ll | 13 +- llvm/test/CodeGen/X86/pr90847.ll | 24 +- .../Transforms/GlobalMerge/private-global.ll | 36 + llvm/test/Transforms/SLPVectorizer/revec.ll | 74 ++ .../Inputs/pseudo-probe-missing-probe.prof | 13 + .../pseudo-probe-missing-probe.ll | 243 +++++++ .../X86/DW_OP_implicit_pointer.yaml | 87 +++ mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 12 +- mlir/include/mlir/IR/PatternMatch.h | 2 +- .../VectorToLLVM/vector-to-llvm.mlir | 91 ++- 90 files changed, 2448 insertions(+), 848 deletions(-) create mode 100644 clang/test/SemaCXX/matrix-index-operator-sign-conversion.cpp create mode 100644 libc/src/math/expm1f16.h create mode 100644 libc/src/math/generic/expm1f16.cpp create mode 100644 libc/test/src/math/expm1f16_test.cpp create mode 100644 libc/test/src/math/smoke/expm1f16_test.cpp create mode 100755 libcxx/utils/synchronize_csv_status_files.py create mode 100644 llvm/test/Transforms/GlobalMerge/private-global.ll create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-missing-probe.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-missing-probe.ll create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/DW_OP_implicit_pointer.yaml diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h index 7d5c19872d5a8..2888e25226755 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -136,8 +136,8 @@ struct Header { } StringRef verbatim() const { return std::get(Storage); } - /// Absolute path for the header when it's a physical file. Otherwise just - /// the spelling without surrounding quotes/brackets. + /// For phiscal files, either absolute path or path relative to the execution + /// root. Otherwise just the spelling without surrounding quotes/brackets. llvm::StringRef resolvedPath() const; private: diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index 68fe79d6929f6..05e9d14734a95 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -82,7 +82,7 @@ analyze(llvm::ArrayRef ASTRoots, const PragmaIncludes *PI, const Preprocessor &PP, llvm::function_ref HeaderFilter) { auto &SM = PP.getSourceManager(); - const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); + const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); llvm::DenseSet Used; llvm::StringSet<> Missing; if (!HeaderFilter) @@ -95,7 +95,7 @@ analyze(llvm::ArrayRef ASTRoots, for (const Header &H : Providers) { if (H.kind() == Header::Physical && (H.physical() == MainFile || - (ResourceDir && H.physical().getDir() == *ResourceDir))) { + H.physical().getDir() == ResourceDir)) { Satisfied = true; } for (const Include *I : Inc.match(H)) { @@ -103,29 +103,30 @@ analyze(llvm::ArrayRef ASTRoots, Satisfied = true; } } - if (!Satisfied && !Providers.empty() && - Ref.RT == RefType::Explicit && - !HeaderFilter(Providers.front().resolvedPath())) { - // Check if we have any headers with the same spelling, in edge - // cases like `#include_next "foo.h"`, the user can't ever - // include the physical foo.h, but can have a spelling that - // refers to it. - auto Spelling = spellHeader( - {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); - for (const Include *I : Inc.match(Header{Spelling})) { - Used.insert(I); - Satisfied = true; - } - if (!Satisfied) - Missing.insert(std::move(Spelling)); + // Bail out if we can't (or need not) insert an include. + if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit) + return; + if (HeaderFilter(Providers.front().resolvedPath())) + return; + // Check if we have any headers with the same spelling, in edge + // cases like `#include_next "foo.h"`, the user can't ever + // include the physical foo.h, but can have a spelling that + // refers to it. + auto Spelling = spellHeader( + {Providers.front(), PP.getHeaderSearchInfo(), MainFile}); + for (const Include *I : Inc.match(Header{Spelling})) { + Used.insert(I); + Satisfied = true; } + if (!Satisfied) + Missing.insert(std::move(Spelling)); }); AnalysisResults Results; for (const Include &I : Inc.all()) { if (Used.contains(&I) || !I.Resolved || - HeaderFilter(I.Resolved->getFileEntry().tryGetRealPathName()) || - (ResourceDir && I.Resolved->getFileEntry().getDir() == *ResourceDir)) + HeaderFilter(I.Resolved->getName()) || + I.Resolved->getDir() == ResourceDir) continue; if (PI) { if (PI->shouldKeep(*I.Resolved)) @@ -137,7 +138,7 @@ analyze(llvm::ArrayRef ASTRoots, // Since most private -> public mappings happen in a verbatim way, we // check textually here. This might go wrong in presence of symlinks or // header mappings. But that's not different than rest of the places. - if (MainFile->tryGetRealPathName().ends_with(PHeader)) + if (MainFile.getName().ends_with(PHeader)) continue; } } diff --git a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp index 195f658a0af92..bbe8bc230c6e2 100644 --- a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp +++ b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/IncludeSpeller.h" #include "clang-include-cleaner/Types.h" #include "clang/AST/ASTContext.h" #include "clang/AST/PrettyPrinter.h" @@ -167,22 +168,6 @@ class Reporter { return "semiused"; } - std::string spellHeader(const Header &H) { - switch (H.kind()) { - case Header::Physical: { - bool IsAngled = false; - std::string Path = HS.suggestPathToFileForDiagnostics( - H.physical(), MainFE->tryGetRealPathName(), &IsAngled); - return IsAngled ? "<" + Path + ">" : "\"" + Path + "\""; - } - case Header::Standard: - return H.standard().name().str(); - case Header::Verbatim: - return H.verbatim().str(); - } - llvm_unreachable("Unknown Header kind"); - } - void fillTarget(Ref &R) { // Duplicates logic from walkUsed(), which doesn't expose SymbolLocations. for (auto &Loc : locateSymbol(R.Sym)) @@ -204,7 +189,7 @@ class Reporter { R.Includes.end()); if (!R.Headers.empty()) - R.Insert = spellHeader(R.Headers.front()); + R.Insert = spellHeader({R.Headers.front(), HS, MainFE}); } public: diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp index cb8a55ed13e5d..7a637639edf8b 100644 --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -10,7 +10,6 @@ #include "TypesInternal.h" #include "clang/AST/Decl.h" #include "clang/Basic/FileEntry.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -48,7 +47,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) { llvm::StringRef Header::resolvedPath() const { switch (kind()) { case include_cleaner::Header::Physical: - return physical().getFileEntry().tryGetRealPathName(); + return physical().getName(); case include_cleaner::Header::Standard: return standard().name().trim("<>\""); case include_cleaner::Header::Verbatim: diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp index 3bc449b0152bb..d8a44ab9b6e12 100644 --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -164,7 +164,7 @@ class Action : public clang::ASTFrontendAction { writeHTML(); llvm::StringRef Path = - SM.getFileEntryForID(SM.getMainFileID())->tryGetRealPathName(); + SM.getFileEntryRefForID(SM.getMainFileID())->getName(); assert(!Path.empty() && "Main file path not known?"); llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index 5696c380758f8..43634ee8f2d80 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -22,9 +22,12 @@ #include "clang/Testing/TestAST.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Testing/Annotations/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -204,21 +207,37 @@ class AnalyzeTest : public testing::Test { TestInputs Inputs; PragmaIncludes PI; RecordedPP PP; + llvm::IntrusiveRefCntPtr ExtraFS = nullptr; + AnalyzeTest() { Inputs.MakeAction = [this] { struct Hook : public SyntaxOnlyAction { public: - Hook(RecordedPP &PP, PragmaIncludes &PI) : PP(PP), PI(PI) {} + Hook(RecordedPP &PP, PragmaIncludes &PI, + llvm::IntrusiveRefCntPtr ExtraFS) + : PP(PP), PI(PI), ExtraFS(std::move(ExtraFS)) {} bool BeginSourceFileAction(clang::CompilerInstance &CI) override { CI.getPreprocessor().addPPCallbacks(PP.record(CI.getPreprocessor())); PI.record(CI); return true; } + bool BeginInvocation(CompilerInstance &CI) override { + if (!ExtraFS) + return true; + auto OverlayFS = + llvm::makeIntrusiveRefCnt( + CI.getFileManager().getVirtualFileSystemPtr()); + OverlayFS->pushOverlay(ExtraFS); + CI.getFileManager().setVirtualFileSystem(std::move(OverlayFS)); + return true; + } + RecordedPP &PP; PragmaIncludes &PI; + llvm::IntrusiveRefCntPtr ExtraFS; }; - return std::make_unique(PP, PI); + return std::make_unique(PP, PI, ExtraFS); }; } }; @@ -322,6 +341,58 @@ TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) { EXPECT_THAT(Results.Missing, testing::IsEmpty()); } +TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) { + llvm::Annotations Code(R"cpp( + #include "header.h" + void $bar^bar() { + $foo^foo(); + } + )cpp"); + Inputs.Code = Code.code(); + ExtraFS = llvm::makeIntrusiveRefCnt(); + ExtraFS->addFile("content_for/0", /*ModificationTime=*/{}, + llvm::MemoryBuffer::getMemBufferCopy(guard(R"cpp( + #include "inner.h" + )cpp"))); + ExtraFS->addSymbolicLink("header.h", "content_for/0", + /*ModificationTime=*/{}); + ExtraFS->addFile("content_for/1", /*ModificationTime=*/{}, + llvm::MemoryBuffer::getMemBufferCopy(guard(R"cpp( + void foo(); + )cpp"))); + ExtraFS->addSymbolicLink("inner.h", "content_for/1", + /*ModificationTime=*/{}); + + TestAST AST(Inputs); + std::vector DeclsInTU; + for (auto *D : AST.context().getTranslationUnitDecl()->decls()) + DeclsInTU.push_back(D); + auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor()); + // Check that we're spelling header using the symlink, and not underlying + // path. + EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\"")); + // header.h should be unused. + EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); + + { + // Make sure filtering is also applied to symlink, not underlying file. + auto HeaderFilter = [](llvm::StringRef Path) { return Path == "inner.h"; }; + Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), + HeaderFilter); + EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\"")); + // header.h should be unused. + EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); + } + { + auto HeaderFilter = [](llvm::StringRef Path) { return Path == "header.h"; }; + Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), + HeaderFilter); + // header.h should be ignored now. + EXPECT_THAT(Results.Unused, Not(testing::IsEmpty())); + EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\"")); + } +} + TEST(FixIncludes, Basic) { llvm::StringRef Code = R"cpp(#include "d.h" #include "a.h" diff --git a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp index 8f6ad09c46cc4..a27e83a434372 100644 --- a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp @@ -47,8 +47,7 @@ class DummyIncludeSpeller : public IncludeSpeller { return ""; if (Input.H.kind() != Header::Physical) return ""; - llvm::StringRef AbsolutePath = - Input.H.physical().getFileEntry().tryGetRealPathName(); + llvm::StringRef AbsolutePath = Input.H.resolvedPath(); std::string RootWithSeparator{testRoot()}; RootWithSeparator += llvm::sys::path::get_separator(); if (!AbsolutePath.consume_front(llvm::StringRef{RootWithSeparator})) diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 1a5996e5df284..715d95eb57346 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -10,6 +10,7 @@ #include "clang-include-cleaner/Types.h" #include "clang/AST/Decl.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileEntry.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Frontend/CompilerInvocation.h" @@ -24,6 +25,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Annotations/Annotations.h" @@ -53,9 +55,11 @@ MATCHER_P(named, N, "") { } MATCHER_P(FileNamed, N, "") { - if (arg.getFileEntry().tryGetRealPathName() == N) + llvm::StringRef ActualName = + llvm::sys::path::remove_leading_dotslash(arg.getName()); + if (ActualName == N) return true; - *result_listener << arg.getFileEntry().tryGetRealPathName().str(); + *result_listener << ActualName.str(); return false; } @@ -317,7 +321,8 @@ class PragmaIncludeTest : public ::testing::Test { } TestAST build(bool ResetPragmaIncludes = true) { - if (ResetPragmaIncludes) PI = PragmaIncludes(); + if (ResetPragmaIncludes) + PI = PragmaIncludes(); return TestAST(Inputs); } @@ -535,16 +540,33 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_THAT(PI.getExporters(FM.getFile("private1.h").get(), FM), - testing::UnorderedElementsAre(FileNamed("export1.h"), - FileNamed("normal.h"))); - EXPECT_THAT(PI.getExporters(FM.getFile("private2.h").get(), FM), - testing::UnorderedElementsAre(FileNamed("export1.h"))); - EXPECT_THAT(PI.getExporters(FM.getFile("private3.h").get(), FM), - testing::UnorderedElementsAre(FileNamed("export1.h"))); - - EXPECT_TRUE(PI.getExporters(FM.getFile("foo.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("bar.h").get(), FM).empty()); + auto GetNames = [](llvm::ArrayRef FEs) { + std::string Result; + llvm::raw_string_ostream OS(Result); + for (auto &FE : FEs) { + OS << FE.getName() << " "; + } + OS.flush(); + return Result; + }; + auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); + EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"), + FileNamed("normal.h"))) + << GetNames(Exporters); + + Exporters = PI.getExporters(FM.getFile("private2.h").get(), FM); + EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) + << GetNames(Exporters); + + Exporters = PI.getExporters(FM.getFile("private3.h").get(), FM); + EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) + << GetNames(Exporters); + + Exporters = PI.getExporters(FM.getFile("foo.h").get(), FM); + EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); + + Exporters = PI.getExporters(FM.getFile("bar.h").get(), FM); + EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); } TEST_F(PragmaIncludeTest, SelfContained) { diff --git a/clang/docs/APINotes.rst b/clang/docs/APINotes.rst index bc09b16bab5d2..dcefa6810dac6 100644 --- a/clang/docs/APINotes.rst +++ b/clang/docs/APINotes.rst @@ -188,6 +188,18 @@ declaration kind), all of which are optional: - Name: tzdb SwiftCopyable: false +:SwiftConformsTo: + + Allows annotating a C++ class as conforming to a Swift protocol. Equivalent + to ``SWIFT_CONFORMS_TO_PROTOCOL``. The value is a module-qualified name of a + Swift protocol. + + :: + + Tags: + - Name: vector + SwiftConformsTo: Cxx.CxxSequence + :Availability, AvailabilityMsg: A value of "nonswift" is equivalent to ``NS_SWIFT_UNAVAILABLE``. A value of diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index c8e5e4df25d17..f972d0cf26640 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -685,6 +685,9 @@ class TagInfo : public CommonTypeInfo { std::optional SwiftRetainOp; std::optional SwiftReleaseOp; + /// The Swift protocol that this type should be automatically conformed to. + std::optional SwiftConformance; + std::optional EnumExtensibility; TagInfo() @@ -720,6 +723,9 @@ class TagInfo : public CommonTypeInfo { if (!SwiftReleaseOp) SwiftReleaseOp = RHS.SwiftReleaseOp; + if (!SwiftConformance) + SwiftConformance = RHS.SwiftConformance; + if (!HasFlagEnum) setFlagEnum(RHS.isFlagEnum()); @@ -742,6 +748,7 @@ inline bool operator==(const TagInfo &LHS, const TagInfo &RHS) { LHS.SwiftImportAs == RHS.SwiftImportAs && LHS.SwiftRetainOp == RHS.SwiftRetainOp && LHS.SwiftReleaseOp == RHS.SwiftReleaseOp && + LHS.SwiftConformance == RHS.SwiftConformance && LHS.isFlagEnum() == RHS.isFlagEnum() && LHS.isSwiftCopyable() == RHS.isSwiftCopyable() && LHS.EnumExtensibility == RHS.EnumExtensibility; diff --git a/clang/lib/APINotes/APINotesFormat.h b/clang/lib/APINotes/APINotesFormat.h index 9d254dcc1c9ef..fba5f4e8907da 100644 --- a/clang/lib/APINotes/APINotesFormat.h +++ b/clang/lib/APINotes/APINotesFormat.h @@ -24,7 +24,7 @@ const uint16_t VERSION_MAJOR = 0; /// API notes file minor version number. /// /// When the format changes IN ANY WAY, this number should be incremented. -const uint16_t VERSION_MINOR = 28; // nested tags +const uint16_t VERSION_MINOR = 29; // SwiftConformsTo const uint8_t kSwiftCopyable = 1; const uint8_t kSwiftNonCopyable = 2; diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp index 871f782511d5f..c05fdffe4a071 100644 --- a/clang/lib/APINotes/APINotesReader.cpp +++ b/clang/lib/APINotes/APINotesReader.cpp @@ -572,6 +572,12 @@ class TagTableInfo ReleaseOpLength - 1); Data += ReleaseOpLength - 1; } + if (unsigned ConformanceLength = + endian::readNext(Data)) { + Info.SwiftConformance = std::string(reinterpret_cast(Data), + ConformanceLength - 1); + Data += ConformanceLength - 1; + } ReadCommonTypeInfo(Data, Info); return Info; diff --git a/clang/lib/APINotes/APINotesWriter.cpp b/clang/lib/APINotes/APINotesWriter.cpp index 2a71922746ac5..cf3a0bee393ee 100644 --- a/clang/lib/APINotes/APINotesWriter.cpp +++ b/clang/lib/APINotes/APINotesWriter.cpp @@ -1189,6 +1189,7 @@ class TagTableInfo : public CommonTypeTableInfo { return 2 + (TI.SwiftImportAs ? TI.SwiftImportAs->size() : 0) + 2 + (TI.SwiftRetainOp ? TI.SwiftRetainOp->size() : 0) + 2 + (TI.SwiftReleaseOp ? TI.SwiftReleaseOp->size() : 0) + + 2 + (TI.SwiftConformance ? TI.SwiftConformance->size() : 0) + 2 + getCommonTypeInfoSize(TI); } @@ -1230,6 +1231,12 @@ class TagTableInfo : public CommonTypeTableInfo { } else { writer.write(0); } + if (auto Conformance = TI.SwiftConformance) { + writer.write(Conformance->size() + 1); + OS.write(Conformance->c_str(), Conformance->size()); + } else { + writer.write(0); + } emitCommonTypeInfo(OS, TI); } diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp index 11cccc94a15f0..2205686c4d15c 100644 --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -419,6 +419,7 @@ struct Tag { std::optional SwiftImportAs; std::optional SwiftRetainOp; std::optional SwiftReleaseOp; + std::optional SwiftConformance; std::optional EnumExtensibility; std::optional FlagEnum; std::optional EnumConvenienceKind; @@ -456,6 +457,7 @@ template <> struct MappingTraits { IO.mapOptional("SwiftImportAs", T.SwiftImportAs); IO.mapOptional("SwiftReleaseOp", T.SwiftReleaseOp); IO.mapOptional("SwiftRetainOp", T.SwiftRetainOp); + IO.mapOptional("SwiftConformsTo", T.SwiftConformance); IO.mapOptional("EnumExtensibility", T.EnumExtensibility); IO.mapOptional("FlagEnum", T.FlagEnum); IO.mapOptional("EnumKind", T.EnumConvenienceKind); @@ -920,6 +922,8 @@ class YAMLConverter { TI.SwiftRetainOp = T.SwiftRetainOp; if (T.SwiftReleaseOp) TI.SwiftReleaseOp = T.SwiftReleaseOp; + if (T.SwiftConformance) + TI.SwiftConformance = T.SwiftConformance; if (T.SwiftCopyable) TI.setSwiftCopyable(T.SwiftCopyable); diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 2c49c1f64b2da..65b56bd1c8efc 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -605,6 +605,10 @@ static void ProcessAPINotes(Sema &S, TagDecl *D, const api_notes::TagInfo &Info, D->addAttr( SwiftAttrAttr::Create(S.Context, "release:" + ReleaseOp.value())); + if (auto ConformsTo = Info.SwiftConformance) + D->addAttr( + SwiftAttrAttr::Create(S.Context, "conforms_to:" + ConformsTo.value())); + if (auto Copyable = Info.isSwiftCopyable()) { if (!*Copyable) D->addAttr(SwiftAttrAttr::Create(S.Context, "~Copyable")); diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes index b0eead42869a4..f4f9c7a244e0a 100644 --- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes +++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes @@ -7,7 +7,9 @@ Tags: SwiftImportAs: reference SwiftReleaseOp: RCRelease SwiftRetainOp: RCRetain + SwiftConformsTo: MySwiftModule.MySwiftRefCountedProtocol - Name: NonCopyableType SwiftCopyable: false + SwiftConformsTo: MySwiftModule.MySwiftNonCopyableProtocol - Name: CopyableType SwiftCopyable: true diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp index 62e6450e94e11..6457e1557618d 100644 --- a/clang/test/APINotes/swift-import-as.cpp +++ b/clang/test/APINotes/swift-import-as.cpp @@ -16,9 +16,11 @@ // CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <> "import_reference" // CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <> "retain:RCRetain" // CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <> "release:RCRelease" +// CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <> "conforms_to:MySwiftModule.MySwiftRefCountedProtocol" // CHECK-NON-COPYABLE: Dumping NonCopyableType: // CHECK-NON-COPYABLE-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct NonCopyableType +// CHECK-NON-COPYABLE: SwiftAttrAttr {{.+}} <> "conforms_to:MySwiftModule.MySwiftNonCopyableProtocol" // CHECK-NON-COPYABLE: SwiftAttrAttr {{.+}} <> "~Copyable" // CHECK-COPYABLE: Dumping CopyableType: diff --git a/clang/test/SemaCXX/matrix-index-operator-sign-conversion.cpp b/clang/test/SemaCXX/matrix-index-operator-sign-conversion.cpp new file mode 100644 index 0000000000000..4254780651c5f --- /dev/null +++ b/clang/test/SemaCXX/matrix-index-operator-sign-conversion.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple arm64-apple-macosx -std=c++11 -fenable-matrix -fsyntax-only -verify -Wsign-conversion %s + +template using m __attribute__((__matrix_type__(R,C))) = T; + +// FIXME: should not warn here. +double index1(m X, int i) { return X[i][0]; } +// expected-warning@-1 {{implicit conversion changes signedness: 'int' to 'unsigned long'}} + +double index2(m X, unsigned i) { return X[i][0]; } + +double index3(m X, char i) { return X[i][0]; } +// expected-warning@-1 {{implicit conversion changes signedness: 'char' to 'unsigned long'}} + +double index4(m X, int i) { return X[0][i]; } +// expected-warning@-1 {{implicit conversion changes signedness: 'int' to 'unsigned long'}} + +double index5(m X, unsigned i) { return X[0][i]; } + +double index6(m X, char i) { return X[0][i]; } +// expected-warning@-1 {{implicit conversion changes signedness: 'char' to 'unsigned long'}} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index cd8153f60670f..3e9bd2c23b2fc 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -110,7 +110,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_tpidr2_save BTI_C // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. @@ -151,7 +151,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_za_disable BTI_C // If the current thread does not have access to SME, the subroutine does // nothing. diff --git a/flang/test/Evaluate/fold-out_of_range.f90 b/flang/test/Evaluate/fold-out_of_range.f90 index 81551255135d2..5a9f900beb2d5 100644 --- a/flang/test/Evaluate/fold-out_of_range.f90 +++ b/flang/test/Evaluate/fold-out_of_range.f90 @@ -1,5 +1,6 @@ -! RUN: %python %S/test_folding.py %s %flang_fc1 -pedantic -! UNSUPPORTED: target=powerpc{{.*}}, target=aarch{{.*}}, target=arm{{.*}}, system-windows, system-solaris +! RUN: %python %S/test_folding.py %s %flang_fc1 -pedantic -triple x86_64-unknown-linux-gnu +! UNSUPPORTED: system-windows +! REQUIRES: target=x86_64{{.*}} ! Tests folding of OUT_OF_RANGE(). module m integer(1), parameter :: i1v(*) = [ -huge(1_1) - 1_1, huge(1_1) ] diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 748401e4cf8ee..65c5757efe627 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -597,6 +597,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.exp10f16 libc.src.math.exp2f16 libc.src.math.expf16 + libc.src.math.expm1f16 libc.src.math.f16add libc.src.math.f16addf libc.src.math.f16addl diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index b617289d1364f..185d2d440849a 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -294,7 +294,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp2m1 | |check| | | | | | 7.12.6.5 | F.10.3.5 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| expm1 | |check| | |check| | | | | 7.12.6.6 | F.10.3.6 | +| expm1 | |check| | |check| | | |check| | | 7.12.6.6 | F.10.3.6 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fma | |check| | |check| | | | | 7.12.13.1 | F.10.10.1 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 077f66e78c116..e06a4f9b268e6 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -599,6 +599,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"expm1", RetValSpec, [ArgSpec]>, FunctionSpec<"expm1f", RetValSpec, [ArgSpec]>, + GuardedFunctionSpec<"expm1f16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, FunctionSpec<"exp10", RetValSpec, [ArgSpec]>, FunctionSpec<"exp10f", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 9f13205a60702..132511a536366 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -121,6 +121,7 @@ add_math_entrypoint_object(exp10f16) add_math_entrypoint_object(expm1) add_math_entrypoint_object(expm1f) +add_math_entrypoint_object(expm1f16) add_math_entrypoint_object(f16add) add_math_entrypoint_object(f16addf) diff --git a/libc/src/math/expm1f16.h b/libc/src/math/expm1f16.h new file mode 100644 index 0000000000000..644e6cddd7666 --- /dev/null +++ b/libc/src/math/expm1f16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for expm1f16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_EXPM1F16_H +#define LLVM_LIBC_SRC_MATH_EXPM1F16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 expm1f16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_EXPM1F16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 79ba07616e507..faa910139fa35 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1359,14 +1359,12 @@ add_entrypoint_object( HDRS ../expf16.h DEPENDS + .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros - libc.src.__support.CPP.array libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization @@ -1608,6 +1606,27 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + expm1f16 + SRCS + expm1f16.cpp + HDRS + ../expm1f16.h + DEPENDS + .expxf16 + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( powf SRCS @@ -5092,4 +5111,7 @@ add_header_library( expxf16.h DEPENDS libc.src.__support.CPP.array + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval ) diff --git a/libc/src/math/generic/expf16.cpp b/libc/src/math/generic/expf16.cpp index b198c559dfedb..7ffdbd5191008 100644 --- a/libc/src/math/generic/expf16.cpp +++ b/libc/src/math/generic/expf16.cpp @@ -7,15 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/math/expf16.h" +#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" -#include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" @@ -41,28 +39,6 @@ static constexpr fputil::ExceptValues EXPF16_EXCEPTS_HI = {{ {0xa954U, 0x3bacU, 1U, 0U, 0U}, }}; -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from -18 to 12 do print(round(exp(i), SG, RN)); -static constexpr cpp::array EXP_HI = { - 0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f, - 0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f, - 0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f, - 0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f, - 0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f, - 0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f, - 0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f, - 0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f, -}; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN)); -static constexpr cpp::array EXP_MID = { - 0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f, - 0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f, -}; - LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { using FPBits = fputil::FPBits; FPBits x_bits(x); @@ -135,38 +111,9 @@ LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) { if (auto r = EXPF16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) return r.value(); - // For -18 < x < 12, to compute exp(x), we perform the following range - // reduction: find hi, mid, lo, such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^3 is an integer, - // -2^(-4) <= lo < 2^(-4). - // In particular, - // hi + mid = round(x * 2^3) * 2^(-3). - // Then, - // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). - // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID - // respectively. exp(lo) is computed using a degree-3 minimax polynomial - // generated by Sollya. - - float xf = x; - float kf = fputil::nearest_integer(xf * 0x1.0p+3f); - int x_hi_mid = static_cast(kf); - int x_hi = x_hi_mid >> 3; - int x_mid = x_hi_mid & 0x7; - // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x - float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); - - float exp_hi = EXP_HI[x_hi + 18]; - float exp_mid = EXP_MID[x_mid]; - // Degree-3 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]); - // > 1 + x * P; - float exp_lo = - fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f); - return static_cast(exp_hi * exp_mid * exp_lo); + // exp(x) = exp(hi + mid) * exp(lo) + auto [exp_hi_mid, exp_lo] = exp_range_reduction(x); + return static_cast(exp_hi_mid * exp_lo); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/expm1f16.cpp b/libc/src/math/generic/expm1f16.cpp new file mode 100644 index 0000000000000..0facdc510e428 --- /dev/null +++ b/libc/src/math/generic/expm1f16.cpp @@ -0,0 +1,132 @@ +//===-- Half-precision e^x - 1 function -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/expm1f16.h" +#include "expxf16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +static constexpr fputil::ExceptValues EXPM1F16_EXCEPTS_LO = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.564p-5, expm1f16(x) = 0x1.5d4p-5 (RZ) + {0x2959U, 0x2975U, 1U, 0U, 1U}, +}}; + +#ifdef LIBC_TARGET_CPU_HAS_FMA +static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 2; +#else +static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 3; +#endif + +static constexpr fputil::ExceptValues + EXPM1F16_EXCEPTS_HI = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.c34p+0, expm1f16(x) = 0x1.34cp+2 (RZ) + {0x3f0dU, 0x44d3U, 1U, 0U, 1U}, + // x = -0x1.e28p-3, expm1f16(x) = -0x1.adcp-3 (RZ) + {0xb38aU, 0xb2b7U, 0U, 1U, 1U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA + // x = 0x1.a08p-3, exp10m1f(x) = 0x1.cdcp-3 (RZ) + {0x3282U, 0x3337U, 1U, 0U, 0U}, +#endif + }}; + +LLVM_LIBC_FUNCTION(float16, expm1f16, (float16 x)) { + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| <= 2^(-3), or |x| >= -11 * log(2), or x is NaN. + if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x47a0U)) { + // expm1(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // expm1(+/-0) = +/-0 + if (x_abs == 0) + return x; + + // When x >= 16 * log(2). + if (x_bits.is_pos() && x_abs >= 0x498cU) { + // expm1(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x <= -11 * log(2). + if (x_u >= 0xc7a0U) { + // expm1(-inf) = -1 + if (x_bits.is_inf()) + return FPBits::one(Sign::NEG).get_val(); + + // When x > -0x1.0ap+3, round(expm1(x), HP, RN) = -1. + if (x_u > 0xc828U) + return fputil::round_result_slightly_up( + FPBits::one(Sign::NEG).get_val()); + // When x <= -0x1.0ap+3, round(expm1(x), HP, RN) = -0x1.ffcp-1. + return fputil::round_result_slightly_down( + static_cast(-0x1.ffcp-1)); + } + + // When 0 < |x| <= 2^(-3). + if (x_abs <= 0x3000U && !x_bits.is_zero()) { + if (auto r = EXPM1F16_EXCEPTS_LO.lookup(x_u); + LIBC_UNLIKELY(r.has_value())) + return r.value(); + + float xf = x; + // Degree-5 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax(expm1(x)/x, 4, [|SG...|], [-2^-3, 2^-3]); + // > x * P; + return static_cast( + xf * fputil::polyeval(xf, 0x1p+0f, 0x1.fffff8p-2f, 0x1.555556p-3f, + 0x1.55905ep-5f, 0x1.1124c2p-7f)); + } + } + + if (auto r = EXPM1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // exp(x) = exp(hi + mid) * exp(lo) + auto [exp_hi_mid, exp_lo] = exp_range_reduction(x); + // expm1(x) = exp(hi + mid) * exp(lo) - 1 + return static_cast(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0f)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index c33aca337b98d..53815e0e27553 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -10,11 +10,76 @@ #define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H #include "src/__support/CPP/array.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/macros/config.h" #include namespace LIBC_NAMESPACE_DECL { +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from -18 to 12 do print(round(exp(i), SG, RN)); +static constexpr cpp::array EXP_HI = { + 0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f, + 0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f, + 0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f, + 0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f, + 0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f, + 0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f, + 0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f, + 0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f, +}; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN)); +static constexpr cpp::array EXP_MID = { + 0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f, + 0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f, +}; + +struct ExpRangeReduction { + float exp_hi_mid; + float exp_lo; +}; + +ExpRangeReduction exp_range_reduction(float16 x) { + // For -18 < x < 12, to compute exp(x), we perform the following range + // reduction: find hi, mid, lo, such that: + // x = hi + mid + lo, in which + // hi is an integer, + // mid * 2^3 is an integer, + // -2^(-4) <= lo < 2^(-4). + // In particular, + // hi + mid = round(x * 2^3) * 2^(-3). + // Then, + // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). + // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID + // respectively. exp(lo) is computed using a degree-3 minimax polynomial + // generated by Sollya. + + float xf = x; + float kf = fputil::nearest_integer(xf * 0x1.0p+3f); + int x_hi_mid = static_cast(kf); + int x_hi = x_hi_mid >> 3; + int x_mid = x_hi_mid & 0x7; + // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x + float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); + + float exp_hi = EXP_HI[x_hi + 18]; + float exp_mid = EXP_MID[x_mid]; + // Degree-3 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]); + // > 1 + x * P; + float exp_lo = + fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f); + return {exp_hi * exp_mid, exp_lo}; +} + // Generated by Sollya with the following commands: // > display = hexadecimal; // > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN)); diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 2749908ef1849..43752a4942ad5 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -234,4 +234,36 @@ template struct FPTest : public Test { #define EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(expected, actual) \ EXPECT_FP_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::TowardZero) +#define EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE( \ + expected, actual, expected_except, rounding_mode) \ + do { \ + using namespace LIBC_NAMESPACE::fputil::testing; \ + ForceRoundingMode __r((rounding_mode)); \ + if (__r.success) { \ + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); \ + EXPECT_FP_EQ((expected), (actual)); \ + EXPECT_FP_EXCEPTION(expected_except); \ + } \ + } while (0) + +#define EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST(expected, actual, \ + expected_except) \ + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE( \ + (expected), (actual), (expected_except), RoundingMode::Nearest) + +#define EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(expected, actual, \ + expected_except) \ + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE( \ + (expected), (actual), (expected_except), RoundingMode::Upward) + +#define EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD(expected, actual, \ + expected_except) \ + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE( \ + (expected), (actual), (expected_except), RoundingMode::Downward) + +#define EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO(expected, actual, \ + expected_except) \ + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_MODE( \ + (expected), (actual), (expected_except), RoundingMode::TowardZero) + #endif // LLVM_LIBC_TEST_UNITTEST_FPMATCHER_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index a8da72fb01be6..0c4118c369454 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1676,6 +1676,19 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + expm1_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + expm1_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.expm1 + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( expm1f_test NEED_MPFR @@ -1690,16 +1703,14 @@ add_fp_unittest( ) add_fp_unittest( - expm1_test - NEED_MPFR - SUITE - libc-math-unittests - SRCS - expm1_test.cpp - DEPENDS - libc.src.errno.errno - libc.src.math.expm1 - libc.src.__support.FPUtil.fp_bits + expm1f16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + expm1f16_test.cpp + DEPENDS + libc.src.math.expm1f16 ) add_fp_unittest( diff --git a/libc/test/src/math/expm1f16_test.cpp b/libc/test/src/math/expm1f16_test.cpp new file mode 100644 index 0000000000000..a6a6fcf73d383 --- /dev/null +++ b/libc/test/src/math/expm1f16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for expm1f16 --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/expm1f16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcExpm1f16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf]; +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0]; +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcExpm1f16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + LIBC_NAMESPACE::expm1f16(x), 0.5); + } +} + +TEST_F(LlvmLibcExpm1f16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + LIBC_NAMESPACE::expm1f16(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 460797b74a13e..7271e933b9311 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3324,6 +3324,18 @@ add_fp_unittest( libc.src.math.fma ) +add_fp_unittest( + expm1_test + SUITE + libc-math-smoke-tests + SRCS + expm1_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.expm1 + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( expm1f_test SUITE @@ -3337,15 +3349,16 @@ add_fp_unittest( ) add_fp_unittest( - expm1_test + expm1f16_test SUITE libc-math-smoke-tests SRCS - expm1_test.cpp + expm1f16_test.cpp DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros libc.src.errno.errno - libc.src.math.expm1 - libc.src.__support.FPUtil.fp_bits + libc.src.math.expm1f16 ) add_fp_unittest( diff --git a/libc/test/src/math/smoke/expm1f16_test.cpp b/libc/test/src/math/smoke/expm1f16_test.cpp new file mode 100644 index 0000000000000..3bdbaad227941 --- /dev/null +++ b/libc/test/src/math/smoke/expm1f16_test.cpp @@ -0,0 +1,108 @@ +//===-- Unittests for expm1f16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/errno/libc_errno.h" +#include "src/math/expm1f16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcExpm1f16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcExpm1f16Test, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::expm1f16(aNaN)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::expm1f16(sNaN), FE_INVALID); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::expm1f16(inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(static_cast(-1.0), + LIBC_NAMESPACE::expm1f16(neg_inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::expm1f16(zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::expm1f16(neg_zero)); + EXPECT_MATH_ERRNO(0); +} + +TEST_F(LlvmLibcExpm1f16Test, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::expm1f16(max_normal), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + // round(16 * log(2), HP, RN); + float16 x = static_cast(0x1.63p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST(inf, LIBC_NAMESPACE::expm1f16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(inf, LIBC_NAMESPACE::expm1f16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + max_normal, LIBC_NAMESPACE::expm1f16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + max_normal, LIBC_NAMESPACE::expm1f16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); +} + +TEST_F(LlvmLibcExpm1f16Test, ResultNearNegOne) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(static_cast(-1.0), + LIBC_NAMESPACE::expm1f16(neg_max_normal), + FE_INEXACT); + + // round(-11 * log(2), HP, RN); + float16 x = static_cast(-0x1.e8p+2); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( + static_cast(-0x1.ffcp-1), LIBC_NAMESPACE::expm1f16(x), + FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(static_cast(-0x1.ffcp-1), + LIBC_NAMESPACE::expm1f16(x), + FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + static_cast(-1.0), LIBC_NAMESPACE::expm1f16(x), FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + static_cast(-0x1.ffcp-1), LIBC_NAMESPACE::expm1f16(x), + FE_INEXACT); + + x = static_cast(-0x1.0a4p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( + static_cast(-1.0), LIBC_NAMESPACE::expm1f16(x), FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(static_cast(-0x1.ffcp-1), + LIBC_NAMESPACE::expm1f16(x), + FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + static_cast(-1.0), LIBC_NAMESPACE::expm1f16(x), FE_INEXACT); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + static_cast(-0x1.ffcp-1), LIBC_NAMESPACE::expm1f16(x), + FE_INEXACT); +} diff --git a/libcxx/utils/synchronize_csv_status_files.py b/libcxx/utils/synchronize_csv_status_files.py new file mode 100755 index 0000000000000..b44b02f5304c0 --- /dev/null +++ b/libcxx/utils/synchronize_csv_status_files.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +from typing import List, Dict, Tuple, Optional +import csv +import itertools +import json +import os +import pathlib +import re +import subprocess + +# Number of the 'Libc++ Standards Conformance' project on Github +LIBCXX_CONFORMANCE_PROJECT = '31' + +class PaperInfo: + paper_number: str + """ + Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'. + """ + + paper_name: str + """ + Plain text string representing the name of the paper. + """ + + meeting: Optional[str] + """ + Plain text string representing the meeting at which the paper/issue was voted. + """ + + status: Optional[str] + """ + Status of the paper/issue. This must be '|Complete|', '|Nothing To Do|', '|In Progress|', + '|Partial|' or 'Resolved by '. + """ + + first_released_version: Optional[str] + """ + First version of LLVM in which this paper/issue was resolved. + """ + + labels: Optional[List[str]] + """ + List of labels to associate to the issue in the status-tracking table. Supported labels are + 'format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS' and 'DR'. + """ + + original: Optional[object] + """ + Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that + was used to generate this PaperInfo and is useful for error reporting purposes. + """ + + def __init__(self, paper_number: str, paper_name: str, + meeting: Optional[str] = None, + status: Optional[str] = None, + first_released_version: Optional[str] = None, + labels: Optional[List[str]] = None, + original: Optional[object] = None): + self.paper_number = paper_number + self.paper_name = paper_name + self.meeting = meeting + self.status = status + self.first_released_version = first_released_version + self.labels = labels + self.original = original + + def for_printing(self) -> Tuple[str, str, str, str, str, str]: + return ( + f'`{self.paper_number} `__', + self.paper_name, + self.meeting if self.meeting is not None else '', + self.status if self.status is not None else '', + self.first_released_version if self.first_released_version is not None else '', + ' '.join(f'|{label}|' for label in self.labels) if self.labels is not None else '', + ) + + def __repr__(self) -> str: + return repr(self.original) if self.original is not None else repr(self.for_printing()) + + def is_implemented(self) -> bool: + if self.status is None: + return False + if re.search(r'(in progress|partial)', self.status.lower()): + return False + return True + + @staticmethod + def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo: + """ + Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row. + """ + # Extract the paper number from the first column + match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0]) + if match is None: + raise RuntimeError(f"Can't parse paper/issue number out of row: {row}") + + return PaperInfo( + paper_number=match.group(1), + paper_name=row[1], + meeting=row[2] or None, + status=row[3] or None, + first_released_version=row[4] or None, + labels=[l.strip('|') for l in row[5].split(' ') if l] or None, + original=row, + ) + + @staticmethod + def from_github_issue(issue: Dict):# -> PaperInfo: + """ + Create a PaperInfo object from the Github issue information obtained from querying a Github Project. + """ + # Extract the paper number from the issue title + match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title']) + if match is None: + raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}") + paper = match.group(1) + + # Figure out the status of the paper according to the Github project information. + # + # Sadly, we can't make a finer-grained distiction about *how* the issue + # was closed (such as Nothing To Do or similar). + status = '|Complete|' if 'status' in issue and issue['status'] == 'Done' else None + + # Handle labels + valid_labels = ('format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS', 'DR') + labels = [label for label in issue['labels'] if label in valid_labels] + + return PaperInfo( + paper_number=paper, + paper_name=issue['title'], + meeting=issue.get('meeting Voted', None), + status=status, + first_released_version=None, # TODO + labels=labels if labels else None, + original=issue, + ) + +def load_csv(file: pathlib.Path) -> List[Tuple]: + rows = [] + with open(file, newline='') as f: + reader = csv.reader(f, delimiter=',') + for row in reader: + rows.append(row) + return rows + +def write_csv(output: pathlib.Path, rows: List[Tuple]): + with open(output, 'w', newline='') as f: + writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n') + for row in rows: + writer.writerow(row) + +def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]: + """ + Given a list of CSV rows representing an existing status file and a list of PaperInfos representing + up-to-date (but potentially incomplete) tracking information from Github, this function returns the + new CSV rows synchronized with the up-to-date information. + + Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date + PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented, + it is an error (i.e. the result is not a CSV row where the paper is *not* implemented). + """ + results = [rows[0]] # Start with the header + for row in rows[1:]: # Skip the header + # If the row contains empty entries, this is a "separator row" between meetings. + # Preserve it as-is. + if row[0] == "": + results.append(row) + continue + + paper = PaperInfo.from_csv_row(row) + + # If the row is already implemented, basically keep it unchanged but also validate that we're not + # out-of-sync with any still-open Github issue tracking the same paper. + if paper.is_implemented(): + dangling = [gh for gh in from_github if gh.paper_number == paper.paper_number and not gh.is_implemented()] + if dangling: + raise RuntimeError(f"We found the following open tracking issues for a row which is already marked as implemented:\nrow: {row}\ntracking issues: {dangling}") + results.append(paper.for_printing()) + else: + # Find any Github issues tracking this paper + tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number] + + # If there is no tracking issue for that row in the CSV, this is an error since we're + # missing a Github issue. + if not tracking: + raise RuntimeError(f"Can't find any Github issue for CSV row which isn't marked as done yet: {row}") + + # If there's more than one tracking issue, something is weird too. + if len(tracking) > 1: + raise RuntimeError(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}") + + # If the issue is closed, synchronize the row based on the Github issue. Otherwise, use the + # existing CSV row as-is. + results.append(tracking[0].for_printing() if tracking[0].is_implemented() else row) + + return results + +CSV_FILES_TO_SYNC = [ + 'Cxx14Issues.csv', + 'Cxx17Issues.csv', + 'Cxx17Papers.csv', + 'Cxx20Issues.csv', + 'Cxx20Papers.csv', + # TODO: The Github issues are not created yet. + # 'Cxx23Issues.csv', + # 'Cxx23Papers.csv', + # 'Cxx2cIssues.csv', + # 'Cxx2cPapers.csv', +] + +def main(): + libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + # Extract the list of PaperInfos from issues we're tracking on Github. + print("Loading all issues from Github") + gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999'] + project_info = json.loads(subprocess.check_output(gh_command_line)) + from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']] + + for filename in CSV_FILES_TO_SYNC: + print(f"Synchronizing {filename} with Github issues") + file = libcxx_root / 'docs' / 'Status' / filename + csv = load_csv(file) + synced = sync_csv(csv, from_github) + write_csv(file, synced) + +if __name__ == '__main__': + main() diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 74e392249a94e..83ba27783da47 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -352,8 +352,11 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() { // build the qSupported packet std::vector features = {"xmlRegisters=i386,arm,mips,arc", - "multiprocess+", "fork-events+", - "vfork-events+"}; + "multiprocess+", + "fork-events+", + "vfork-events+", + "swbreak+", + "hwbreak+"}; StreamString packet; packet.PutCString("qSupported"); for (uint32_t i = 0; i < features.size(); ++i) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index a0b08a219ae14..345f5cd5de849 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -4245,6 +4245,10 @@ std::vector GDBRemoteCommunicationServerLLGS::HandleFeatures( .Case("vfork-events+", Extension::vfork) .Default({}); + // We consume lldb's swbreak/hwbreak feature, but it doesn't change the + // behaviour of lldb-server. We always adjust the program counter for targets + // like x86 + m_extensions_supported &= plugin_features; // fork & vfork require multiprocess diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 6f9c2cc1e4b4e..c7ce368ab41ce 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -2354,6 +2354,9 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { if (!key.getAsInteger(16, reg)) expedited_register_map[reg] = std::string(std::move(value)); } + // swbreak and hwbreak are also expected keys, but we don't need to + // change our behaviour for them because lldb always expects the remote + // to adjust the program counter (if relevant, e.g., for x86 targets) } if (stop_pid != LLDB_INVALID_PROCESS_ID && stop_pid != pid) { diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestStopPCs.py b/lldb/test/API/functionalities/gdb_remote_client/TestStopPCs.py index ef28cc95f7ad4..3faae5fec38ba 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestStopPCs.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestStopPCs.py @@ -10,13 +10,17 @@ class TestStopPCs(GDBRemoteTestBase): def test(self): class MyResponder(MockGDBServerResponder): def haltReason(self): - return "T02thread:1ff0d;threads:1ff0d,2ff0d;thread-pcs:10001bc00,10002bc00;" + # lldb should treat the default halt reason, hwbreak and swbreak in the same way. Which is that it + # expects the stub to have corrected the PC already, so lldb should not modify it further. + return "T02thread:1ff0d;threads:1ff0d,2ff0d,3ff0d;thread-pcs:10001bc00,10002bc00,10003bc00;" def threadStopInfo(self, threadnum): if threadnum == 0x1FF0D: - return "T02thread:1ff0d;threads:1ff0d,2ff0d;thread-pcs:10001bc00,10002bc00;" + return "T02thread:1ff0d;threads:1ff0d,2ff0d,3ff0d;thread-pcs:10001bc00,10002bc00,10003bc00;" if threadnum == 0x2FF0D: - return "T00thread:2ff0d;threads:1ff0d,2ff0d;thread-pcs:10001bc00,10002bc00;" + return "T00swbreak:;thread:2ff0d;threads:1ff0d,2ff0d,3ff0d;thread-pcs:10001bc00,10002bc00,10003bc00;" + if threadnum == 0x3FF0D: + return "T00hwbreak:;thread:3ff0d;threads:1ff0d,2ff0d,3ff0d;thread-pcs:10001bc00,10002bc00,10003bc00;" def qXferRead(self, obj, annex, offset, length): if annex == "target.xml": @@ -40,10 +44,13 @@ def qXferRead(self, obj, annex, offset, length): self.addTearDownHook(lambda: self.runCmd("log disable gdb-remote packets")) process = self.connect(target) - self.assertEqual(process.GetNumThreads(), 2) + self.assertEqual(process.GetNumThreads(), 3) th0 = process.GetThreadAtIndex(0) th1 = process.GetThreadAtIndex(1) + th2 = process.GetThreadAtIndex(2) self.assertEqual(th0.GetThreadID(), 0x1FF0D) self.assertEqual(th1.GetThreadID(), 0x2FF0D) + self.assertEqual(th2.GetThreadID(), 0x3FF0D) self.assertEqual(th0.GetFrameAtIndex(0).GetPC(), 0x10001BC00) self.assertEqual(th1.GetFrameAtIndex(0).GetPC(), 0x10002BC00) + self.assertEqual(th2.GetFrameAtIndex(0).GetPC(), 0x10003BC00) diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h index 12ca1b1b9aa97..f8ec0971517a9 100644 --- a/llvm/include/llvm/IR/CFG.h +++ b/llvm/include/llvm/IR/CFG.h @@ -304,8 +304,13 @@ template <> struct GraphTraits { static NodeRef getEntryNode(BasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } + + static unsigned getNumber(const BasicBlock *BB) { return BB->getNumber(); } }; +static_assert(GraphHasNodeNumbers, + "GraphTraits getNumber() not detected"); + template <> struct GraphTraits { using NodeRef = const BasicBlock *; using ChildIteratorType = const_succ_iterator; @@ -314,8 +319,13 @@ template <> struct GraphTraits { static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } + + static unsigned getNumber(const BasicBlock *BB) { return BB->getNumber(); } }; +static_assert(GraphHasNodeNumbers, + "GraphTraits getNumber() not detected"); + // Provide specializations of GraphTraits to be able to treat a function as a // graph of basic blocks... and to walk it in inverse order. Inverse order for // a function is considered to be when traversing the predecessor edges of a BB @@ -328,8 +338,13 @@ template <> struct GraphTraits> { static NodeRef getEntryNode(Inverse G) { return G.Graph; } static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } + + static unsigned getNumber(const BasicBlock *BB) { return BB->getNumber(); } }; +static_assert(GraphHasNodeNumbers>, + "GraphTraits getNumber() not detected"); + template <> struct GraphTraits> { using NodeRef = const BasicBlock *; using ChildIteratorType = const_pred_iterator; @@ -337,8 +352,13 @@ template <> struct GraphTraits> { static NodeRef getEntryNode(Inverse G) { return G.Graph; } static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } + + static unsigned getNumber(const BasicBlock *BB) { return BB->getNumber(); } }; +static_assert(GraphHasNodeNumbers>, + "GraphTraits getNumber() not detected"); + //===--------------------------------------------------------------------===// // GraphTraits specializations for function basic block graphs (CFGs) //===--------------------------------------------------------------------===// @@ -362,6 +382,13 @@ template <> struct GraphTraits : public GraphTraits { } static size_t size(Function *F) { return F->size(); } + + static unsigned getMaxNumber(const Function *F) { + return F->getMaxBlockNumber(); + } + static unsigned getNumberEpoch(const Function *F) { + return F->getBlockNumberEpoch(); + } }; template <> struct GraphTraits : public GraphTraits { @@ -379,6 +406,13 @@ template <> struct GraphTraits : } static size_t size(const Function *F) { return F->size(); } + + static unsigned getMaxNumber(const Function *F) { + return F->getMaxBlockNumber(); + } + static unsigned getNumberEpoch(const Function *F) { + return F->getBlockNumberEpoch(); + } }; // Provide specializations of GraphTraits to be able to treat a function as a @@ -391,12 +425,26 @@ template <> struct GraphTraits> : static NodeRef getEntryNode(Inverse G) { return &G.Graph->getEntryBlock(); } + + static unsigned getMaxNumber(const Function *F) { + return F->getMaxBlockNumber(); + } + static unsigned getNumberEpoch(const Function *F) { + return F->getBlockNumberEpoch(); + } }; template <> struct GraphTraits> : public GraphTraits> { static NodeRef getEntryNode(Inverse G) { return &G.Graph->getEntryBlock(); } + + static unsigned getMaxNumber(const Function *F) { + return F->getMaxBlockNumber(); + } + static unsigned getNumberEpoch(const Function *F) { + return F->getBlockNumberEpoch(); + } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 32bf7b8c96be3..5132dca7c6a22 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -449,9 +449,6 @@ SampleProfileLoaderBaseImpl::getInstWeightImpl(const InstructionT &Inst) { return R; } -// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight -// of non-probe instruction. So if all instructions of the BB give error_code, -// tell the inference algorithm to infer the BB weight. template ErrorOr SampleProfileLoaderBaseImpl::getProbeWeight(const InstructionT &Inst) { @@ -464,17 +461,13 @@ SampleProfileLoaderBaseImpl::getProbeWeight(const InstructionT &Inst) { return std::error_code(); const FunctionSamples *FS = findFunctionSamples(Inst); - // If none of the instruction has FunctionSample, we choose to return zero - // value sample to indicate the BB is cold. This could happen when the - // instruction is from inlinee and no profile data is found. - // FIXME: This should not be affected by the source drift issue as 1) if the - // newly added function is top-level inliner, it won't match the CFG checksum - // in the function profile or 2) if it's the inlinee, the inlinee should have - // a profile, otherwise it wouldn't be inlined. For non-probe based profile, - // we can improve it by adding a switch for profile-sample-block-accurate for - // block level counts in the future. - if (!FS) - return 0; + if (!FS) { + // If we can't find the function samples for a probe, it could be due to the + // probe is later optimized away or the inlining context is mismatced. We + // treat it as unknown, leaving it to profile inference instead of forcing a + // zero count. + return std::error_code(); + } auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator); if (R) { diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 53ed60f51fde0..e1abf5e4d885e 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -660,7 +660,8 @@ Value *llvm::lowerObjectSizeCall( if (!MustSucceed) return nullptr; - return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0); + return MaxVal ? Constant::getAllOnesValue(ResultType) + : Constant::getNullValue(ResultType); } STATISTIC(ObjectVisitorArgument, diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 9f7baa983f122..79504ca7b73c8 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -291,10 +291,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, if (isa(LoadOperand)) return MemDepResult::getUnknown(); - // Queue to process all pointers that are equivalent to load operand. - SmallVector LoadOperandsQueue; - LoadOperandsQueue.push_back(LoadOperand); - Instruction *ClosestDependency = nullptr; // Order of instructions in uses list is unpredictible. In order to always // get the same result, we will look for the closest dominance. @@ -305,44 +301,19 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, return Best; }; - // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case - // we will see all the instructions. This should be fixed in MSSA. - while (!LoadOperandsQueue.empty()) { - const Value *Ptr = LoadOperandsQueue.pop_back_val(); - assert(Ptr && !isa(Ptr) && - "Null or GlobalValue should not be inserted"); - - for (const Use &Us : Ptr->uses()) { - auto *U = dyn_cast(Us.getUser()); - if (!U || U == LI || !DT.dominates(U, LI)) - continue; - - // Bitcast or gep with zeros are using Ptr. Add to queue to check it's - // users. U = bitcast Ptr - if (isa(U)) { - LoadOperandsQueue.push_back(U); - continue; - } - // Gep with zeros is equivalent to bitcast. - // FIXME: we are not sure if some bitcast should be canonicalized to gep 0 - // or gep 0 to bitcast because of SROA, so there are 2 forms. When - // typeless pointers will be ready then both cases will be gone - // (and this BFS also won't be needed). - if (auto *GEP = dyn_cast(U)) - if (GEP->hasAllZeroIndices()) { - LoadOperandsQueue.push_back(U); - continue; - } + for (const Use &Us : LoadOperand->uses()) { + auto *U = dyn_cast(Us.getUser()); + if (!U || U == LI || !DT.dominates(U, LI)) + continue; - // If we hit load/store with the same invariant.group metadata (and the - // same pointer operand) we can assume that value pointed by pointer - // operand didn't change. - if ((isa(U) || - (isa(U) && - cast(U)->getPointerOperand() == Ptr)) && - U->hasMetadata(LLVMContext::MD_invariant_group)) - ClosestDependency = GetClosestDependency(ClosestDependency, U); - } + // If we hit load/store with the same invariant.group metadata (and the + // same pointer operand) we can assume that value pointed by pointer + // operand didn't change. + if ((isa(U) || + (isa(U) && + cast(U)->getPointerOperand() == LoadOperand)) && + U->hasMetadata(LLVMContext::MD_invariant_group)) + ClosestDependency = GetClosestDependency(ClosestDependency, U); } if (!ClosestDependency) diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 48ef73e59045e..1583e0e31efc1 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -2507,45 +2507,22 @@ getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) { if (isa(PointerOperand)) return nullptr; - // Queue to process all pointers that are equivalent to load operand. - SmallVector PointerUsesQueue; - PointerUsesQueue.push_back(PointerOperand); - const Instruction *MostDominatingInstruction = &I; - // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case - // we will see all the instructions. It may not matter in practice. If it - // does, we will have to support MemorySSA construction and updates. - while (!PointerUsesQueue.empty()) { - const Value *Ptr = PointerUsesQueue.pop_back_val(); - assert(Ptr && !isa(Ptr) && - "Null or GlobalValue should not be inserted"); - - for (const User *Us : Ptr->users()) { - auto *U = dyn_cast(Us); - if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction)) - continue; - - // Add bitcasts and zero GEPs to queue. - if (isa(U)) { - PointerUsesQueue.push_back(U); - continue; - } - if (auto *GEP = dyn_cast(U)) { - if (GEP->hasAllZeroIndices()) - PointerUsesQueue.push_back(U); - continue; - } + for (const User *Us : PointerOperand->users()) { + auto *U = dyn_cast(Us); + if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction)) + continue; - // If we hit a load/store with an invariant.group metadata and the same - // pointer operand, we can assume that value pointed to by the pointer - // operand didn't change. - if (U->hasMetadata(LLVMContext::MD_invariant_group) && - getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) { - MostDominatingInstruction = U; - } + // If we hit a load/store with an invariant.group metadata and the same + // pointer operand, we can assume that value pointed to by the pointer + // operand didn't change. + if (U->hasMetadata(LLVMContext::MD_invariant_group) && + getLoadStorePointerOperand(U) == PointerOperand && !U->isVolatile()) { + MostDominatingInstruction = U; } } + return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 0169a0e466d87..219c60eab04f5 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3339,7 +3339,7 @@ void IRTranslator::finishPendingPhis() { #ifndef NDEBUG DILocationVerifier Verifier; GISelObserverWrapper WrapperObserver(&Verifier); - RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver); + RAIIMFObsDelInstaller ObsInstall(*MF, WrapperObserver); #endif // ifndef NDEBUG for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; @@ -3966,8 +3966,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { DILocationVerifier Verifier; WrapperObserver.addObserver(&Verifier); #endif // ifndef NDEBUG - RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver); - RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver); + RAIIMFObsDelInstaller ObsInstall(*MF, WrapperObserver); for (const BasicBlock *BB : RPOT) { MachineBasicBlock &MBB = getMBB(*BB); // Set the insertion point of all the following translations to diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 65bf7161441ba..8aa4345cfd6df 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -664,7 +664,7 @@ bool GlobalMergeImpl::run(Module &M) { continue; if (!(Opt.MergeExternal && GV.hasExternalLinkage()) && - !GV.hasInternalLinkage()) + !GV.hasLocalLinkage()) continue; PointerType *PT = dyn_cast(GV.getType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c3a7df5361cd4..6ed77fc8d8f17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5140,12 +5140,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, if (Op.getOpcode() == ISD::FREEZE) return true; - // TODO: Assume we don't know anything for now. EVT VT = Op.getValueType(); - if (VT.isScalableVector()) - return false; - - APInt DemandedElts = VT.isVector() + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); @@ -5190,6 +5186,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; + case ISD::SPLAT_VECTOR: + return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly, + Depth + 1); + case ISD::VECTOR_SHUFFLE: { APInt DemandedLHS, DemandedRHS; auto *SVN = cast(Op); @@ -5236,12 +5236,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { - // TODO: Assume we don't know anything for now. EVT VT = Op.getValueType(); - if (VT.isScalableVector()) - return true; - - APInt DemandedElts = VT.isVector() + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, @@ -5251,11 +5247,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { - // TODO: Assume we don't know anything for now. - EVT VT = Op.getValueType(); - if (VT.isScalableVector()) - return true; - if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) return true; @@ -5292,6 +5283,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BITCAST: case ISD::BUILD_VECTOR: case ISD::BUILD_PAIR: + case ISD::SPLAT_VECTOR: return false; case ISD::SELECT_CC: diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 79e240d62a17d..1b0012b65b80d 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -944,13 +944,13 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); - if (getOptLevel() != CodeGenOptLevel::None) - addPass(createObjCARCContractPass()); - // Force codegen to run according to the callgraph. if (requiresCodeGenSCCOrder()) addPass(new DummyCGSCCPass); + if (getOptLevel() != CodeGenOptLevel::None) + addPass(createObjCARCContractPass()); + addPass(createCallBrPass()); // Add both the safe stack and the stack protection passes: each of them will diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 00c2823cee0af..ec7af792efb06 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -113,11 +113,10 @@ static void dumpExpression(raw_ostream &OS, DIDumpOptions DumpOpts, ArrayRef Data, bool IsLittleEndian, unsigned AddressSize, DWARFUnit *U) { DWARFDataExtractor Extractor(Data, IsLittleEndian, AddressSize); - // Note. We do not pass any format to DWARFExpression, even if the - // corresponding unit is known. For now, there is only one operation, - // DW_OP_call_ref, which depends on the format; it is rarely used, and - // is unexpected in location tables. - DWARFExpression(Extractor, AddressSize).print(OS, DumpOpts, U); + std::optional Format; + if (U) + Format = U->getFormat(); + DWARFExpression(Extractor, AddressSize, Format).print(OS, DumpOpts, U); } bool DWARFLocationTable::dumpLocationList( diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index b90addbfba04a..2ae5ff3efc8c5 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -90,6 +90,8 @@ static std::vector getOpDescriptions() { Descriptions[DW_OP_implicit_value] = Desc(Op::Dwarf4, Op::SizeLEB, Op::SizeBlock); Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf4); + Descriptions[DW_OP_implicit_pointer] = + Desc(Op::Dwarf5, Op::SizeRefAddr, Op::SignedSizeLEB); Descriptions[DW_OP_addrx] = Desc(Op::Dwarf5, Op::SizeLEB); Descriptions[DW_OP_constx] = Desc(Op::Dwarf5, Op::SizeLEB); Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB); diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 0391d51832431..b8f9b58a21644 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1310,6 +1310,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M, StringRef ExpStr = cast(Node->getOperand(0))->getString(); MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr); MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName()); + + OutStreamer->beginCOFFSymbolDef(ExpSym); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(Sym); OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2074fac857891..04dfd0ea0d893 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -412,6 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); if (Subtarget.is64Bit()) { + setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); } @@ -3237,9 +3238,10 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, } bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const { - // Speculate cttz only if we can directly use TZCNT or can promote to i32. + // Speculate cttz only if we can directly use TZCNT or can promote to i32/i64. return Subtarget.hasBMI() || - (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32); + (!Ty->isVectorTy() && + Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u)); } bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 99ec50aa4775c..452fff7898d0e 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -371,7 +371,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, append_range(Worklist, Arg.users()); while (!Worklist.empty()) { Value *V = Worklist.pop_back_val(); - if (isa(V) || isa(V)) { + if (isa(V)) { DeadInsts.push_back(cast(V)); append_range(Worklist, V->users()); continue; @@ -608,10 +608,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); Value *V = U->getUser(); - if (isa(V)) { - AppendUses(V); - continue; - } if (auto *GEP = dyn_cast(V)) { if (!GEP->hasAllConstantIndices()) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index db5e94806e9a1..8ece5bbdfc77e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -2327,8 +2327,8 @@ struct AANoFreeFloating : AANoFreeImpl { DepClassTy::REQUIRED, IsKnown); } - if (isa(UserI) || isa(UserI) || - isa(UserI) || isa(UserI)) { + if (isa(UserI) || isa(UserI) || + isa(UserI)) { Follow = true; return true; } diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 2d7b7355229ea..548335d750e33 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -481,11 +481,6 @@ Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, // the usage in the CallInst, which is what we check here. if (User == Call) continue; - if (auto *Bitcast = dyn_cast(User)) { - if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call) - return nullptr; - continue; - } if (auto *Store = dyn_cast(User)) { // This is a duplicate store, bail out. diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 5293a777496bc..aae4926e027ff 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1050,11 +1050,6 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, continue; // Otherwise, storing through it, or storing into GV... fine. } - if (auto *BCI = dyn_cast(U)) { - Worklist.push_back(BCI); - continue; - } - if (auto *GEPI = dyn_cast(U)) { Worklist.push_back(GEPI); continue; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a38c990b9ea83..4f9a5bd2c17f0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -104,7 +104,7 @@ static Type *getPromotedType(Type *Ty) { /// requires a deeper change to allow either unread or unwritten objects. static bool hasUndefSource(AnyMemTransferInst *MI) { auto *Src = MI->getRawSource(); - while (isa(Src) || isa(Src)) { + while (isa(Src)) { if (!Src->hasOneUse()) return false; Src = cast(Src)->getOperand(0); @@ -260,13 +260,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. - Value *Dest = MI->getDest(); // Extract the fill value and store. - const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - Constant *FillVal = ConstantInt::get(ITy, Fill); + Constant *FillVal = ConstantInt::get( + MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue())); StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile()); S->copyMetadata(*MI, LLVMContext::MD_DIAssignID); auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 10a89b47e0753..2b0347073b7d1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -312,7 +312,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( DL.getTypeAllocSize(Init->getType()->getArrayElementType()); auto MaskIdx = [&](Value *Idx) { if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) { - Value *Mask = ConstantInt::get(Idx->getType(), -1); + Value *Mask = Constant::getAllOnesValue(Idx->getType()); Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize)); Idx = Builder.CreateAnd(Idx, Mask); } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 0fb8b639c97b9..877e4f591af5a 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4707,8 +4707,7 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { pushUsers(*AI); while (!AllocaUsers.empty()) { auto *UserI = cast(AllocaUsers.pop_back_val()); - if (isa(UserI) || isa(UserI) || - isa(UserI)) { + if (isa(UserI) || isa(UserI)) { pushUsers(*UserI); continue; } diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 3abf3aa5542c2..613597b087881 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -357,19 +357,19 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { // Insert new integer induction variable. PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator()); - NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), + NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); NewPHI->setDebugLoc(PN->getDebugLoc()); - Instruction *NewAdd = - BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), - Incr->getName() + ".int", Incr->getIterator()); + Instruction *NewAdd = BinaryOperator::CreateAdd( + NewPHI, ConstantInt::getSigned(Int32Ty, IncValue), + Incr->getName() + ".int", Incr->getIterator()); NewAdd->setDebugLoc(Incr->getDebugLoc()); NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - ICmpInst *NewCompare = - new ICmpInst(TheBr->getIterator(), NewPred, NewAdd, - ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); + ICmpInst *NewCompare = new ICmpInst( + TheBr->getIterator(), NewPred, NewAdd, + ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName()); NewCompare->setDebugLoc(Compare->getDebugLoc()); // In the following deletions, PN may become dead and may be deleted. diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index cee34f0a6da1f..1d779128e454c 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -980,12 +980,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, while (!srcUseList.empty()) { User *U = srcUseList.pop_back_val(); - if (isa(U) || isa(U)) { - append_range(srcUseList, U->users()); - continue; - } - if (const auto *G = dyn_cast(U); - G && G->hasAllZeroIndices()) { + if (isa(U)) { append_range(srcUseList, U->users()); continue; } diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp index b177e048faae0..0b3016a86e287 100644 --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -143,9 +143,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::Stored; } } - } else if (isa(I) || isa(I) || - isa(I)) { - // Skip over bitcasts and GEPs; we don't care about the type or offset + } else if (isa(I) || isa(I)) { + // Skip over GEPs; we don't care about the type or offset // of the pointer. if (analyzeGlobalAux(I, GS, VisitedUsers)) return true; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ccdfe47ef81e7..37c8761ca9383 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7652,10 +7652,6 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu } } - // Look through bitcasts. - if (BitCastInst *BC = dyn_cast(Use)) - return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); - // Load from null is undefined. if (LoadInst *LI = dyn_cast(Use)) if (!LI->isVolatile()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0d1262fa18729..f5337b11edc97 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3139,12 +3139,10 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { return WideningDecision != CM_GatherScatter; }; - // A helper that returns true if the given value is a bitcast or - // getelementptr instruction contained in the loop. - auto isLoopVaryingBitCastOrGEP = [&](Value *V) { - return ((isa(V) && V->getType()->isPointerTy()) || - isa(V)) && - !TheLoop->isLoopInvariant(V); + // A helper that returns true if the given value is a getelementptr + // instruction contained in the loop. + auto isLoopVaryingGEP = [&](Value *V) { + return isa(V) && !TheLoop->isLoopInvariant(V); }; // A helper that evaluates a memory access's use of a pointer. If the use will @@ -3154,7 +3152,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) { // We only care about bitcast and getelementptr instructions contained in // the loop. - if (!isLoopVaryingBitCastOrGEP(Ptr)) + if (!isLoopVaryingGEP(Ptr)) return; // If the pointer has already been identified as scalar (e.g., if it was @@ -3220,7 +3218,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { unsigned Idx = 0; while (Idx != Worklist.size()) { Instruction *Dst = Worklist[Idx++]; - if (!isLoopVaryingBitCastOrGEP(Dst->getOperand(0))) + if (!isLoopVaryingGEP(Dst->getOperand(0))) continue; auto *Src = cast(Dst->getOperand(0)); if (llvm::all_of(Src->users(), [&](User *U) -> bool { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d1946313addb5..ebfb11f841086 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9527,8 +9527,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // that the costs will be accurate. auto It = MinBWs.find(E); Type *OrigScalarTy = ScalarTy; - if (It != MinBWs.end()) + if (It != MinBWs.end()) { + auto VecTy = dyn_cast(ScalarTy); ScalarTy = IntegerType::get(F->getContext(), It->second.first); + if (VecTy) + ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements()); + } auto *VecTy = getWidenedType(ScalarTy, VL.size()); unsigned EntryVF = E->getVectorFactor(); auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF); @@ -12586,8 +12590,15 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, } if (IsSameVE) { auto FinalShuffle = [&](Value *V, ArrayRef Mask) { + // V may be affected by MinBWs. + // We want ShuffleInstructionBuilder to correctly support REVEC. The key + // factor is the number of elements, not their type. + Type *ScalarTy = cast(V->getType())->getElementType(); + unsigned NumElements = getNumElements(VL.front()->getType()); ShuffleInstructionBuilder ShuffleBuilder( - cast(V->getType())->getElementType(), Builder, *this); + NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements) + : ScalarTy, + Builder, *this); ShuffleBuilder.add(V, Mask); return ShuffleBuilder.finalize(std::nullopt); }; @@ -13120,8 +13131,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { else if (auto *IE = dyn_cast(V)) ScalarTy = IE->getOperand(1)->getType(); auto It = MinBWs.find(E); - if (It != MinBWs.end()) + if (It != MinBWs.end()) { + auto VecTy = dyn_cast(ScalarTy); ScalarTy = IntegerType::get(F->getContext(), It->second.first); + if (VecTy) + ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements()); + } auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size()); if (E->isGather()) { // Set insert point for non-reduction initial nodes. @@ -15564,7 +15579,8 @@ bool BoUpSLP::collectValuesToDemote( if (all_of(E.Scalars, IsaPred)) return true; - unsigned OrigBitWidth = DL->getTypeSizeInBits(E.Scalars.front()->getType()); + unsigned OrigBitWidth = + DL->getTypeSizeInBits(E.Scalars.front()->getType()->getScalarType()); if (OrigBitWidth == BitWidth) { MaxDepthLevel = 1; return true; @@ -15995,7 +16011,9 @@ void BoUpSLP::computeMinimumValueSizes() { } unsigned VF = E.getVectorFactor(); - auto *TreeRootIT = dyn_cast(E.Scalars.front()->getType()); + Type *ScalarTy = E.Scalars.front()->getType(); + unsigned ScalarTyNumElements = getNumElements(ScalarTy); + auto *TreeRootIT = dyn_cast(ScalarTy->getScalarType()); if (!TreeRootIT || !Opcode) return 0u; @@ -16003,7 +16021,8 @@ void BoUpSLP::computeMinimumValueSizes() { [&](Value *V) { return AnalyzedMinBWVals.contains(V); })) return 0u; - unsigned NumParts = TTI->getNumberOfParts(getWidenedType(TreeRootIT, VF)); + unsigned NumParts = TTI->getNumberOfParts( + getWidenedType(TreeRootIT, VF * ScalarTyNumElements)); // The maximum bit width required to represent all the values that can be // demoted without loss of precision. It would be safe to truncate the roots @@ -16025,7 +16044,8 @@ void BoUpSLP::computeMinimumValueSizes() { // we can truncate the roots to this narrower type. for (Value *Root : E.Scalars) { unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); - TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()); + TypeSize NumTypeBits = + DL->getTypeSizeInBits(Root->getType()->getScalarType()); unsigned BitWidth1 = NumTypeBits - NumSignBits; // If we can't prove that the sign bit is zero, we must add one to the // maximum bit width to account for the unknown sign bit. This preserves @@ -16145,7 +16165,8 @@ void BoUpSLP::computeMinimumValueSizes() { for (unsigned Idx : RootDemotes) { if (all_of(VectorizableTree[Idx]->Scalars, [&](Value *V) { - uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType()); + uint32_t OrigBitWidth = + DL->getTypeSizeInBits(V->getType()->getScalarType()); if (OrigBitWidth > MaxBitWidth) { APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth); return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)); @@ -16196,7 +16217,8 @@ void BoUpSLP::computeMinimumValueSizes() { // type, we can proceed with the narrowing. Otherwise, do nothing. if (MaxBitWidth == 0 || MaxBitWidth >= - cast(TreeRoot.front()->getType())->getBitWidth()) { + cast(TreeRoot.front()->getType()->getScalarType()) + ->getBitWidth()) { if (UserIgnoreList) AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end()); continue; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1a93f275a39f5..911b2fe9e9a1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1652,24 +1652,25 @@ void VPBlendRecipe::execute(VPTransformState &State) { // In0))) // Note that Mask0 is never used: lanes for which no path reaches this phi and // are essentially undef are taken from In0. - VectorParts Entry(State.UF); - bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - for (unsigned In = 0; In < NumIncoming; ++In) { - for (unsigned Part = 0; Part < State.UF; ++Part) { - // We might have single edge PHIs (blocks) - use an identity - // 'select' for the first PHI operand. - Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed); - if (In == 0) - Entry[Part] = In0; // Initialize with the first incoming value. - else { - // Select between the current value and the previous incoming edge - // based on the incoming mask. - Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed); - Entry[Part] = - State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); - } - } - } + VectorParts Entry(State.UF); + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); + for (unsigned In = 0; In < NumIncoming; ++In) { + for (unsigned Part = 0; Part < State.UF; ++Part) { + // We might have single edge PHIs (blocks) - use an identity + // 'select' for the first PHI operand. + Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed); + if (In == 0) + Entry[Part] = In0; // Initialize with the first incoming value. + else { + // Select between the current value and the previous incoming edge + // based on the incoming mask. + Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed); + Entry[Part] = + State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); + } + } + } + for (unsigned Part = 0; Part < State.UF; ++Part) State.set(this, Entry[Part], Part, OnlyFirstLaneUsed); } diff --git a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll index 25b169e08a723..a485bad2a477e 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll @@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) { } define i32 @var_cttz_i32(i32 %a) { -; NOBMI-LABEL: 'var_cttz_i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz -; -; BMI-LABEL: 'var_cttz_i32' -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz +; CHECK-LABEL: 'var_cttz_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz ; %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0) ret i32 %cttz diff --git a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll index a8abba56ba49f..066e3232612f2 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll @@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) { } define i32 @var_cttz_i32(i32 %a) { -; NOBMI-LABEL: 'var_cttz_i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz -; -; BMI-LABEL: 'var_cttz_i32' -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz +; CHECK-LABEL: 'var_cttz_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz ; %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0) ret i32 %cttz diff --git a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll index 294f101f571bf..cc71bb5c90883 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll @@ -39,13 +39,9 @@ define i64 @var_cttz_i64u(i64 %a) { } define i32 @var_cttz_i32(i32 %a) { -; NOBMI-LABEL: 'var_cttz_i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz -; -; BMI-LABEL: 'var_cttz_i32' -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; BMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz +; CHECK-LABEL: 'var_cttz_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %cttz ; %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0) ret i32 %cttz diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 88a22f98feb25..1de3e2a853dd8 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -237,17 +237,17 @@ define void @cttz(i32 %a, <16 x i32> %va) { ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'cttz' -; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' -; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) ; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'cttz' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 4d5f08d41b9ce..359e22fa41bac 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s ; This test set ensures that we can correctly operate with recurrencies from @@ -7,28 +8,62 @@ ; order. define void @test_00() { - -; CHECK-LABEL: Classifying expressions for: @test_00 -; CHECK: %sum1 = add i32 %phi1, %phi2 -; CHECK-NEXT: --> {14,+,3}<%loop1> -; CHECK: %sum2 = add i32 %sum1, %phi3 -; CHECK-NEXT: --> {20,+,6}<%loop1> -; CHECK: %sum3 = add i32 %phi4, %phi5 -; CHECK-NEXT: --> {116,+,3}<%loop2> -; CHECK: %sum4 = add i32 %sum3, %phi6 -; CHECK-NEXT: --> {159,+,6}<%loop2> -; CHECK: %s1 = add i32 %phi1, %phi4 -; CHECK-NEXT: --> {{\{\{}}73,+,1}<%loop1>,+,1}<%loop2> -; CHECK: %s2 = add i32 %phi5, %phi2 -; CHECK-NEXT: --> {{\{\{}}57,+,2}<%loop1>,+,2}<%loop2> -; CHECK: %s3 = add i32 %sum1, %sum3 -; CHECK-NEXT: --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> -; CHECK: %s4 = add i32 %sum4, %sum2 -; CHECK-NEXT: --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> -; CHECK: %s5 = add i32 %phi3, %sum3 -; CHECK-NEXT: --> {{\{\{}}122,+,3}<%loop1>,+,3}<%loop2> -; CHECK: %s6 = add i32 %sum2, %phi6 -; CHECK-NEXT: --> {{\{\{}}63,+,6}<%loop1>,+,3}<%loop2> +; CHECK-LABEL: 'test_00' +; CHECK-NEXT: Classifying expressions for: @test_00 +; CHECK-NEXT: %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1 ] +; CHECK-NEXT: --> {10,+,1}<%loop1> U: [10,175) S: [10,175) Exits: 174 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2 = phi i32 [ 4, %entry ], [ %phi2.inc, %loop1 ] +; CHECK-NEXT: --> {4,+,2}<%loop1> U: [4,333) S: [4,333) Exits: 332 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] +; CHECK-NEXT: --> {6,+,3}<%loop1> U: [6,499) S: [6,499) Exits: 498 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {11,+,1}<%loop1> U: [11,176) S: [11,176) Exits: 175 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {6,+,2}<%loop1> U: [6,335) S: [6,335) Exits: 334 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3 +; CHECK-NEXT: --> {9,+,3}<%loop1> U: [9,502) S: [9,502) Exits: 501 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {14,+,3}<%loop1> U: [14,507) S: [14,507) Exits: 506 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {20,+,6}<%loop1> U: [20,1005) S: [20,1005) Exits: 1004 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ] +; CHECK-NEXT: --> {63,+,1}<%loop2> U: [63,205) S: [63,205) Exits: 204 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ] +; CHECK-NEXT: --> {53,+,2}<%loop2> U: [53,336) S: [53,336) Exits: 335 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] +; CHECK-NEXT: --> {43,+,3}<%loop2> U: [43,467) S: [43,467) Exits: 466 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi4.inc = add i32 %phi4, 1 +; CHECK-NEXT: --> {64,+,1}<%loop2> U: [64,206) S: [64,206) Exits: 205 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5.inc = add i32 %phi5, 2 +; CHECK-NEXT: --> {55,+,2}<%loop2> U: [55,338) S: [55,338) Exits: 337 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6.inc = add i32 %phi6, 3 +; CHECK-NEXT: --> {46,+,3}<%loop2> U: [46,470) S: [46,470) Exits: 469 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {116,+,3}<%loop2> U: [116,540) S: [116,540) Exits: 539 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {159,+,6}<%loop2> U: [159,1006) S: [159,1006) Exits: 1005 LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %s1 = add i32 %phi1, %phi4 +; CHECK-NEXT: --> {{\{\{}}73,+,1}<%loop1>,+,1}<%loop2> U: [73,379) S: [73,379) --> 378 U: [378,379) S: [378,379) +; CHECK-NEXT: %s2 = add i32 %phi5, %phi2 +; CHECK-NEXT: --> {{\{\{}}57,+,2}<%loop1>,+,2}<%loop2> U: [57,668) S: [57,668) --> 667 U: [667,668) S: [667,668) +; CHECK-NEXT: %s3 = add i32 %sum1, %sum3 +; CHECK-NEXT: --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046) +; CHECK-NEXT: %s4 = add i32 %sum4, %sum2 +; CHECK-NEXT: --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010) +; CHECK-NEXT: %s5 = add i32 %phi3, %sum3 +; CHECK-NEXT: --> {{\{\{}}122,+,3}<%loop1>,+,3}<%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038) +; CHECK-NEXT: %s6 = add i32 %sum2, %phi6 +; CHECK-NEXT: --> {{\{\{}}63,+,6}<%loop1>,+,3}<%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471) +; CHECK-NEXT: Determining loop execution counts for: @test_00 +; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 141 +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 141 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is i32 141 +; CHECK-NEXT: Loop %loop2: Trip multiple is 142 +; CHECK-NEXT: Loop %loop1: backedge-taken count is i32 164 +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i32 164 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is i32 164 +; CHECK-NEXT: Loop %loop1: Trip multiple is 165 +; entry: br label %loop1 @@ -71,34 +106,68 @@ exit: ; in any order. define void @test_01(i32 %a, i32 %b) { - -; CHECK-LABEL: Classifying expressions for: @test_01 -; CHECK: %sum1 = add i32 %phi1, %phi2 -; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> -; CHECK: %sum2 = add i32 %sum1, %phi3 -; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> -; CHECK: %is1 = add i32 %sum2, %a -; CHECK-NEXT: --> {(6 + (2 * %a) + %b),+,6}<%loop1> -; CHECK: %sum3 = add i32 %phi4, %phi5 -; CHECK-NEXT: --> {116,+,3}<%loop2> -; CHECK: %sum4 = add i32 %sum3, %phi6 -; CHECK-NEXT: --> {159,+,6}<%loop2> -; CHECK: %is2 = add i32 %sum4, %b -; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> -; CHECK: %ec2 = add i32 %is1, %is2 -; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> -; CHECK: %s1 = add i32 %phi1, %is1 -; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1> -; CHECK: %s2 = add i32 %is2, %phi4 -; CHECK-NEXT: --> {(222 + %b),+,7}<%loop2> -; CHECK: %s3 = add i32 %is1, %phi5 -; CHECK-NEXT: --> {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> -; CHECK: %s4 = add i32 %phi2, %is2 -; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> -; CHECK: %s5 = add i32 %is1, %is2 -; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> -; CHECK: %s6 = add i32 %is2, %is1 -; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> +; CHECK-LABEL: 'test_01' +; CHECK-NEXT: Classifying expressions for: @test_01 +; CHECK-NEXT: %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] +; CHECK-NEXT: --> {%a,+,1}<%loop1> U: full-set S: full-set Exits: (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))) + %a) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ] +; CHECK-NEXT: --> {%b,+,2}<%loop1> U: full-set S: full-set Exits: ((2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] +; CHECK-NEXT: --> {6,+,3}<%loop1> U: [6,508) S: [6,508) Exits: (6 + (3 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop1> U: full-set S: full-set Exits: (1 + ((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))) + %a) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {(2 + %b),+,2}<%loop1> U: full-set S: full-set Exits: (2 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3 +; CHECK-NEXT: --> {9,+,3}<%loop1> U: [9,511) S: [9,511) Exits: (9 + (3 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> U: full-set S: full-set Exits: ((3 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %a + %b) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> U: full-set S: full-set Exits: (6 + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %a + %b) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %is1 = add i32 %sum2, %a +; CHECK-NEXT: --> {(6 + (2 * %a) + %b),+,6}<%loop1> U: full-set S: full-set Exits: (6 + (2 * %a) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ] +; CHECK-NEXT: --> {63,+,1}<%loop2> U: [63,231) S: [63,231) Exits: (63 + ((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ] +; CHECK-NEXT: --> {53,+,2}<%loop2> U: [53,388) S: [53,388) Exits: (53 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] +; CHECK-NEXT: --> {43,+,3}<%loop2> U: [43,545) S: [43,545) Exits: (43 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi4.inc = add i32 %phi4, 1 +; CHECK-NEXT: --> {64,+,1}<%loop2> U: [64,232) S: [64,232) Exits: (64 + ((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5.inc = add i32 %phi5, 2 +; CHECK-NEXT: --> {55,+,2}<%loop2> U: [55,390) S: [55,390) Exits: (55 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6.inc = add i32 %phi6, 3 +; CHECK-NEXT: --> {46,+,3}<%loop2> U: [46,548) S: [46,548) Exits: (46 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {116,+,3}<%loop2> U: [116,618) S: [116,618) Exits: (116 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {159,+,6}<%loop2> U: [159,1162) S: [159,1162) Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %is2 = add i32 %sum4, %b +; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %ec2 = add i32 %is1, %is2 +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %s1 = add i32 %phi1, %is1 +; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1> U: full-set S: full-set --> (6 + (3 * %a) + (7 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set +; CHECK-NEXT: %s2 = add i32 %is2, %phi4 +; CHECK-NEXT: --> {(222 + %b),+,7}<%loop2> U: full-set S: full-set --> (222 + (7 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) U: full-set S: full-set +; CHECK-NEXT: %s3 = add i32 %is1, %phi5 +; CHECK-NEXT: --> {{\{\{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> U: full-set S: full-set --> (59 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + (2 * %a) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set +; CHECK-NEXT: %s4 = add i32 %phi2, %is2 +; CHECK-NEXT: --> {{\{\{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: %s5 = add i32 %is1, %is2 +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: %s6 = add i32 %is2, %is1 +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @test_01 +; CHECK-NEXT: Loop %loop2: backedge-taken count is (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 167 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) +; CHECK-NEXT: Loop %loop2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop1: backedge-taken count is (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i32 167 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) +; CHECK-NEXT: Loop %loop1: Trip multiple is 1 +; entry: br label %loop1 @@ -144,36 +213,72 @@ exit: ; loops in any order. define void @test_02(i32 %a, i32 %b, ptr %p) { - -; CHECK-LABEL: Classifying expressions for: @test_02 -; CHECK: %sum1 = add i32 %phi1, %phi2 -; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> -; CHECK: %sum2 = add i32 %sum1, %phi3 -; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> -; CHECK: %is1 = add i32 %sum2, %v1 -; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) -; CHECK: %sum3 = add i32 %phi4, %phi5 -; CHECK-NEXT: --> {(%a + %b),+,3}<%loop2> -; CHECK: %sum4 = add i32 %sum3, %phi6 -; CHECK-NEXT: --> {(43 + %a + %b),+,6}<%loop2> -; CHECK: %is2 = add i32 %sum4, %v2 -; CHECK-NEXT: --> ({(43 + %a + %b),+,6}<%loop2> + %v2) -; CHECK: %is3 = add i32 %v1, %sum2 -; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) -; CHECK: %ec2 = add i32 %is1, %is3 -; CHECK-NEXT: --> (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1)) -; CHECK: %s1 = add i32 %phi1, %is1 -; CHECK-NEXT: --> ({(6 + (2 * %a) + %b),+,7}<%loop1> + %v1) -; CHECK: %s2 = add i32 %is2, %phi4 -; CHECK-NEXT: --> ({(43 + (2 * %a) + %b),+,7}<%loop2> + %v2) -; CHECK: %s3 = add i32 %is1, %phi5 -; CHECK-NEXT: --> {({(6 + (2 * %b) + %a),+,6}<%loop1> + %v1),+,2}<%loop2> -; CHECK: %s4 = add i32 %phi2, %is2 -; CHECK-NEXT: --> ({{{{}}(43 + (2 * %b) + %a),+,2}<%loop1>,+,6}<%loop2> + %v2) -; CHECK: %s5 = add i32 %is1, %is2 -; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) -; CHECK: %s6 = add i32 %is2, %is1 -; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) +; CHECK-LABEL: 'test_02' +; CHECK-NEXT: Classifying expressions for: @test_02 +; CHECK-NEXT: %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] +; CHECK-NEXT: --> {%a,+,1}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ] +; CHECK-NEXT: --> {%b,+,2}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] +; CHECK-NEXT: --> {6,+,3}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {(2 + %b),+,2}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3 +; CHECK-NEXT: --> {9,+,3}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %v1 = load i32, ptr %p, align 4 +; CHECK-NEXT: --> %v1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Variant } +; CHECK-NEXT: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %is1 = add i32 %sum2, %v1 +; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Variant } +; CHECK-NEXT: %phi4 = phi i32 [ %a, %loop1 ], [ %phi4.inc, %loop2 ] +; CHECK-NEXT: --> {%a,+,1}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5 = phi i32 [ %b, %loop1 ], [ %phi5.inc, %loop2 ] +; CHECK-NEXT: --> {%b,+,2}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] +; CHECK-NEXT: --> {43,+,3}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi4.inc = add i32 %phi4, 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi5.inc = add i32 %phi5, 2 +; CHECK-NEXT: --> {(2 + %b),+,2}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi6.inc = add i32 %phi6, 3 +; CHECK-NEXT: --> {46,+,3}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %v2 = load i32, ptr %p, align 4 +; CHECK-NEXT: --> %v2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {(43 + %a + %b),+,6}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %is2 = add i32 %sum4, %v2 +; CHECK-NEXT: --> ({(43 + %a + %b),+,6}<%loop2> + %v2) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %is3 = add i32 %v1, %sum2 +; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) U: full-set S: full-set Exits: ({(6 + %a + %b),+,6}<%loop1> + %v1) LoopDispositions: { %loop2: Invariant } +; CHECK-NEXT: %ec2 = add i32 %is1, %is3 +; CHECK-NEXT: --> (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1)) U: [0,-1) S: [-2147483648,2147483647) Exits: (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1)) LoopDispositions: { %loop2: Invariant } +; CHECK-NEXT: %s1 = add i32 %phi1, %is1 +; CHECK-NEXT: --> ({(6 + (2 * %a) + %b),+,7}<%loop1> + %v1) U: full-set S: full-set +; CHECK-NEXT: %s2 = add i32 %is2, %phi4 +; CHECK-NEXT: --> ({(43 + (2 * %a) + %b),+,7}<%loop2> + %v2) U: full-set S: full-set +; CHECK-NEXT: %s3 = add i32 %is1, %phi5 +; CHECK-NEXT: --> {({(6 + (2 * %b) + %a),+,6}<%loop1> + %v1),+,2}<%loop2> U: full-set S: full-set +; CHECK-NEXT: %s4 = add i32 %phi2, %is2 +; CHECK-NEXT: --> ({{\{\{}}(43 + (2 * %b) + %a),+,2}<%loop1>,+,6}<%loop2> + %v2) U: full-set S: full-set +; CHECK-NEXT: %s5 = add i32 %is1, %is2 +; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) U: full-set S: full-set +; CHECK-NEXT: %s6 = add i32 %is2, %is1 +; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @test_02 +; CHECK-NEXT: Loop %loop2: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop2: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop2: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop1: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop1: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop1: Unpredictable symbolic max backedge-taken count. +; entry: br label %loop1 @@ -224,16 +329,33 @@ exit: ; because we cannot prove for sure that it doesn't use Phis of loop 2. define void @test_03(i32 %a, i32 %b, i32 %c, ptr %p) { - -; CHECK-LABEL: Classifying expressions for: @test_03 -; CHECK: %v1 = load i32, ptr %p -; CHECK-NEXT: --> %v1 -; CHECK: %s1 = add i32 %phi1, %v1 -; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1) -; CHECK: %s2 = add i32 %s1, %b -; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1) -; CHECK: %s3 = add i32 %s2, %phi2 -; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1) +; CHECK-LABEL: 'test_03' +; CHECK-NEXT: Classifying expressions for: @test_03 +; CHECK-NEXT: %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] +; CHECK-NEXT: --> {%a,+,1}<%loop1> U: full-set S: full-set Exits: (%a umax %c) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {(1 + %a),+,1}<%loop1> U: full-set S: full-set Exits: (1 + (%a umax %c)) LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %phi2 = phi i32 [ %a, %loop1 ], [ %phi2.inc, %loop2 ] +; CHECK-NEXT: --> {%a,+,2}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {(2 + %a),+,2}<%loop2> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %v1 = load i32, ptr %p, align 4 +; CHECK-NEXT: --> %v1 U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %s1 = add i32 %phi1, %v1 +; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1) U: full-set S: full-set --> ((%a umax %c) + %v1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %s2 = add i32 %s1, %b +; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1) U: full-set S: full-set --> ((%a umax %c) + %b + %v1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %s3 = add i32 %s2, %phi2 +; CHECK-NEXT: --> ({{\{\{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1) U: full-set S: full-set --> ({((%a umax %c) + %a + %b),+,2}<%loop2> + %v1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: Determining loop execution counts for: @test_03 +; CHECK-NEXT: Loop %loop2: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop2: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop2: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %loop1: backedge-taken count is ((-1 * %a) + (%a umax %c)) +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i32 -1 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is ((-1 * %a) + (%a umax %c)) +; CHECK-NEXT: Loop %loop1: Trip multiple is 1 +; entry: br label %loop1 @@ -262,29 +384,40 @@ exit: ; Another mix of previous use cases that demonstrates that incorrect picking of ; a loop for a recurrence may cause a crash of SCEV analysis. define void @test_04() { - -; CHECK-LABEL: Classifying expressions for: @test_04 -; CHECK: %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ] -; CHECK-NEXT: --> {2,+,1}<%loop1> -; CHECK: %tmp2 = trunc i64 %tmp to i32 -; CHECK-NEXT: --> {2,+,1}<%loop1> -; CHECK: %tmp4 = add nuw nsw i64 %tmp, 1 -; CHECK-NEXT: --> {3,+,1}<%loop1> -; CHECK: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ] -; CHECK-NEXT: --> {2,+,1}<%loop2> U: [2,9223372036854775807) S: [2,9223372036854775807) -; CHECK: %tmp9 = sext i8 %tmp8 to i64 -; CHECK-NEXT: --> (sext i8 %tmp8 to i64) U: [-128,128) S: [-128,128) -; CHECK: %tmp10 = sub i64 %tmp9, %tmp7 -; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<%loop2>) U: [9223372036854775682,126) S: [9223372036854775682,126) -; CHECK: %tmp11 = add i64 %tmp10, undef -; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<%loop2>) -; CHECK: %tmp13 = trunc i64 %tmp11 to i32 -; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(-2 + (trunc i64 undef to i32)),+,-1}<%loop2>) -; CHECK: %tmp14 = sub i32 %tmp13, %tmp2 +; CHECK-LABEL: 'test_04' +; CHECK-NEXT: Classifying expressions for: @test_04 +; CHECK-NEXT: %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ] +; CHECK-NEXT: --> {2,+,1}<%loop1> U: [2,-9223372036854775808) S: [2,-9223372036854775808) Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %tmp2 = trunc i64 %tmp to i32 +; CHECK-NEXT: --> {2,+,1}<%loop1> U: full-set S: full-set Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %tmp4 = add nuw nsw i64 %tmp, 1 +; CHECK-NEXT: --> {3,+,1}<%loop1> U: [3,0) S: [3,0) Exits: <> LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ] +; CHECK-NEXT: --> {2,+,1}<%loop2> U: [2,9223372036854775807) S: [2,9223372036854775807) Exits: (-1 + (3 smax {2,+,1}<%loop1>)) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %tmp8 = load i8, ptr addrspace(1) undef, align 1 +; CHECK-NEXT: --> %tmp8 U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp9 = sext i8 %tmp8 to i64 +; CHECK-NEXT: --> (sext i8 %tmp8 to i64) U: [-128,128) S: [-128,128) Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp10 = sub i64 %tmp9, %tmp7 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<%loop2>) U: [9223372036854775682,126) S: [9223372036854775682,126) Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp11 = add i64 %tmp10, undef +; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<%loop2>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp13 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(-2 + (trunc i64 undef to i32)),+,-1}<%loop2>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp14 = sub i32 %tmp13, %tmp2 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{\{\{}}(-4 + (trunc i64 undef to i32)),+,-1}<%loop1>,+,-1}<%loop2>) U: full-set S: full-set Exits: <> LoopDispositions: { %loop2: Variant } +; CHECK-NEXT: %tmp15 = add nuw nsw i64 %tmp7, 1 +; CHECK-NEXT: --> {3,+,1}<%loop2> U: [3,-9223372036854775808) S: [3,-9223372036854775808) Exits: (3 smax {2,+,1}<%loop1>) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_04 +; CHECK-NEXT: Loop %loop2: backedge-taken count is (-3 + (3 smax {2,+,1}<%loop1>)) +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i64 9223372036854775804 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is (-3 + (3 smax {2,+,1}<%loop1>)) +; CHECK-NEXT: Loop %loop2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop1: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop1: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop1: Unpredictable symbolic max backedge-taken count. +; ; `{{[{][{]}}` is the ugliness needed to match `{{` -; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{[{][{]}}(-4 + (trunc i64 undef to i32)),+,-1}<%loop1>,+,-1}<%loop2>) -; CHECK: %tmp15 = add nuw nsw i64 %tmp7, 1 -; CHECK-NEXT: --> {3,+,1}<%loop2> bb: br label %loop1 @@ -319,14 +452,28 @@ loop2: ; Demonstrate a situation when we can add two recs with different degrees from ; the same loop. define void @test_05(i32 %N) { - -; CHECK-LABEL: Classifying expressions for: @test_05 -; CHECK: %SQ = mul i32 %i.0, %i.0 -; CHECK-NEXT: --> {4,+,5,+,2}<%bb3> -; CHECK: %tmp4 = mul i32 %i.0, 2 -; CHECK-NEXT: --> {4,+,2}<%bb3> -; CHECK: %tmp5 = sub i32 %SQ, %tmp4 -; CHECK-NEXT: --> {0,+,3,+,2}<%bb3> +; CHECK-LABEL: 'test_05' +; CHECK-NEXT: Classifying expressions for: @test_05 +; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32 +; CHECK-NEXT: --> 0 U: [0,1) S: [0,1) +; CHECK-NEXT: %tmp = getelementptr [1000 x i32], ptr @A, i32 0, i32 %i.0 +; CHECK-NEXT: --> {(8 + @A),+,4}<%bb3> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (408 + @A) LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: %tmp2 = add i32 %i.0, 1 +; CHECK-NEXT: --> {3,+,1}<%bb3> U: [3,104) S: [3,104) Exits: 103 LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ] +; CHECK-NEXT: --> {2,+,1}<%bb3> U: [2,103) S: [2,103) Exits: 102 LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: %SQ = mul i32 %i.0, %i.0 +; CHECK-NEXT: --> {4,+,5,+,2}<%bb3> U: full-set S: full-set Exits: 10404 LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: %tmp4 = mul i32 %i.0, 2 +; CHECK-NEXT: --> {4,+,2}<%bb3> U: [4,205) S: [4,205) Exits: 204 LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: %tmp5 = sub i32 %SQ, %tmp4 +; CHECK-NEXT: --> {0,+,3,+,2}<%bb3> U: full-set S: full-set Exits: 10200 LoopDispositions: { %bb3: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_05 +; CHECK-NEXT: Loop %bb3: backedge-taken count is i32 100 +; CHECK-NEXT: Loop %bb3: constant max backedge-taken count is i32 100 +; CHECK-NEXT: Loop %bb3: symbolic max backedge-taken count is i32 100 +; CHECK-NEXT: Loop %bb3: Trip multiple is 101 +; entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -356,20 +503,46 @@ return: ; preds = %bb5 ; Check that we can add Phis from different loops with different nesting, nested ; loop comes first. define void @test_06() { - -; CHECK-LABEL: Classifying expressions for: @test_06 -; CHECK: %s1 = add i32 %phi1, %phi2 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> -; CHECK: %s2 = add i32 %phi2, %phi1 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> -; CHECK: %s3 = add i32 %phi1, %phi3 -; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> -; CHECK: %s4 = add i32 %phi3, %phi1 -; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> -; CHECK: %s5 = add i32 %phi2, %phi3 -; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> -; CHECK: %s6 = add i32 %phi3, %phi2 -; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> +; CHECK-LABEL: 'test_06' +; CHECK-NEXT: Classifying expressions for: @test_06 +; CHECK-NEXT: %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1.exit ] +; CHECK-NEXT: --> {10,+,1}<%loop1> U: [10,1000) S: [10,1000) Exits: 999 LoopDispositions: { %loop1: Computable, %loop2: Invariant } +; CHECK-NEXT: %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ] +; CHECK-NEXT: --> {20,+,2}<%loop2> U: [20,999) S: [20,999) Exits: 998 LoopDispositions: { %loop2: Computable, %loop1: Variant } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {22,+,2}<%loop2> U: [22,1001) S: [22,1001) Exits: 1000 LoopDispositions: { %loop2: Computable, %loop1: Variant } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {11,+,1}<%loop1> U: [11,1001) S: [11,1001) Exits: 1000 LoopDispositions: { %loop1: Computable, %loop2: Invariant } +; CHECK-NEXT: %phi3 = phi i32 [ 30, %loop1.exit ], [ %phi3.inc, %loop3 ] +; CHECK-NEXT: --> {30,+,3}<%loop3> U: [30,1000) S: [30,1000) Exits: 999 LoopDispositions: { %loop3: Computable } +; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3 +; CHECK-NEXT: --> {33,+,3}<%loop3> U: [33,1003) S: [33,1003) Exits: 1002 LoopDispositions: { %loop3: Computable } +; CHECK-NEXT: %s1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: %s2 = add i32 %phi2, %phi1 +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: %s3 = add i32 %phi1, %phi3 +; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999) +; CHECK-NEXT: %s4 = add i32 %phi3, %phi1 +; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999) +; CHECK-NEXT: %s5 = add i32 %phi2, %phi3 +; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: %s6 = add i32 %phi3, %phi2 +; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: Determining loop execution counts for: @test_06 +; CHECK-NEXT: Loop %loop3: backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: constant max backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: symbolic max backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: Trip multiple is 324 +; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: Trip multiple is 490 +; CHECK-NEXT: Loop %loop1: backedge-taken count is i32 989 +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i32 989 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is i32 989 +; CHECK-NEXT: Loop %loop1: Trip multiple is 990 +; entry: br label %loop1 @@ -408,20 +581,46 @@ exit: ; Check that we can add Phis from different loops with different nesting, nested ; loop comes second. define void @test_07() { - -; CHECK-LABEL: Classifying expressions for: @test_07 -; CHECK: %s1 = add i32 %phi1, %phi2 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> -; CHECK: %s2 = add i32 %phi2, %phi1 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> -; CHECK: %s3 = add i32 %phi1, %phi3 -; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> -; CHECK: %s4 = add i32 %phi3, %phi1 -; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> -; CHECK: %s5 = add i32 %phi2, %phi3 -; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> -; CHECK: %s6 = add i32 %phi3, %phi2 -; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> +; CHECK-LABEL: 'test_07' +; CHECK-NEXT: Classifying expressions for: @test_07 +; CHECK-NEXT: %phi3 = phi i32 [ 30, %entry ], [ %phi3.inc, %loop3 ] +; CHECK-NEXT: --> {30,+,3}<%loop3> U: [30,1000) S: [30,1000) Exits: 999 LoopDispositions: { %loop3: Computable } +; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3 +; CHECK-NEXT: --> {33,+,3}<%loop3> U: [33,1003) S: [33,1003) Exits: 1002 LoopDispositions: { %loop3: Computable } +; CHECK-NEXT: %phi1 = phi i32 [ 10, %loop3 ], [ %phi1.inc, %loop1.exit ] +; CHECK-NEXT: --> {10,+,1}<%loop1> U: [10,11) S: [10,11) Exits: 10 LoopDispositions: { %loop1: Computable, %loop2: Invariant } +; CHECK-NEXT: %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ] +; CHECK-NEXT: --> {20,+,2}<%loop2> U: [20,999) S: [20,999) Exits: 998 LoopDispositions: { %loop2: Computable, %loop1: Variant } +; CHECK-NEXT: %phi2.inc = add i32 %phi2, 2 +; CHECK-NEXT: --> {22,+,2}<%loop2> U: [22,1001) S: [22,1001) Exits: 1000 LoopDispositions: { %loop2: Computable, %loop1: Variant } +; CHECK-NEXT: %phi1.inc = add i32 %phi1, 1 +; CHECK-NEXT: --> {11,+,1}<%loop1> U: [11,12) S: [11,12) Exits: 11 LoopDispositions: { %loop1: Computable, %loop2: Invariant } +; CHECK-NEXT: %s1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009) +; CHECK-NEXT: %s2 = add i32 %phi2, %phi1 +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009) +; CHECK-NEXT: %s3 = add i32 %phi1, %phi3 +; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010) +; CHECK-NEXT: %s4 = add i32 %phi3, %phi1 +; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010) +; CHECK-NEXT: %s5 = add i32 %phi2, %phi3 +; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: %s6 = add i32 %phi3, %phi2 +; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: Determining loop execution counts for: @test_07 +; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is i32 489 +; CHECK-NEXT: Loop %loop2: Trip multiple is 490 +; CHECK-NEXT: Loop %loop1: backedge-taken count is i32 0 +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i32 0 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is i32 0 +; CHECK-NEXT: Loop %loop1: Trip multiple is 1 +; CHECK-NEXT: Loop %loop3: backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: constant max backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: symbolic max backedge-taken count is i32 323 +; CHECK-NEXT: Loop %loop3: Trip multiple is 324 +; entry: br label %loop3 @@ -460,16 +659,50 @@ exit: ; Make sure that a complicated Phi does not get folded with rec's start value ; of a loop which is above. define void @test_08() { - -; CHECK-LABEL: Classifying expressions for: @test_08 -; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1 -; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) -; CHECK: %tmp12 = trunc i64 %tmp11 to i32 -; CHECK-NEXT: --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) -; CHECK: %tmp14 = mul i32 %tmp12, %tmp7 -; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>) -; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1 -; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) +; CHECK-LABEL: 'test_08' +; CHECK-NEXT: Classifying expressions for: @test_08 +; CHECK-NEXT: %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ] +; CHECK-NEXT: --> {2,+,1}<%loop_1> U: [2,4) S: [2,4) Exits: 3 LoopDispositions: { %loop_1: Computable } +; CHECK-NEXT: %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ] +; CHECK-NEXT: --> {-1,+,1}<%loop_1> U: [-1,1) S: [-1,1) Exits: 0 LoopDispositions: { %loop_1: Computable } +; CHECK-NEXT: %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1 +; CHECK-NEXT: --> {3,+,1}<%loop_1> U: [3,5) S: [3,5) Exits: 4 LoopDispositions: { %loop_1: Computable } +; CHECK-NEXT: %iv.1.2.next = add nsw i32 %iv.1.2, 1 +; CHECK-NEXT: --> {0,+,1}<%loop_1> U: [0,2) S: [0,2) Exits: 1 LoopDispositions: { %loop_1: Computable } +; CHECK-NEXT: %tmp6 = sub i64 1, %iv.1.1 +; CHECK-NEXT: --> {-1,+,-1}<%loop_1> U: [-2,0) S: [-2,0) --> -2 U: [-2,-1) S: [-2,-1) +; CHECK-NEXT: %tmp7 = trunc i64 %tmp6 to i32 +; CHECK-NEXT: --> {-1,+,-1}<%loop_1> U: [-2,0) S: [-2,0) --> -2 U: [-2,-1) S: [-2,-1) +; CHECK-NEXT: %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ] +; CHECK-NEXT: --> %iv.2.1 U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ] +; CHECK-NEXT: --> {0,+,-1}<%loop_2> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop_2: Computable } +; CHECK-NEXT: %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ] +; CHECK-NEXT: --> {2,+,1}<%loop_2> U: [2,3) S: [2,3) Exits: 2 LoopDispositions: { %loop_2: Computable } +; CHECK-NEXT: %tmp11 = add i64 %iv.2.2, %iv.2.1 +; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: %tmp12 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: %tmp14 = mul i32 %tmp12, %tmp7 +; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>) U: full-set S: full-set --> (-2 * ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)) U: [0,-1) S: [-2147483648,2147483647) Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: %tmp16 = mul i64 %iv.2.1, %iv.1.1 +; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) U: full-set S: full-set --> (3 * %iv.2.1) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 +; CHECK-NEXT: --> {3,+,1}<%loop_2> U: [3,4) S: [3,4) Exits: 3 LoopDispositions: { %loop_2: Computable } +; CHECK-NEXT: %iv.2.2.next = add nsw i64 %iv.2.2, -1 +; CHECK-NEXT: --> {-1,+,-1}<%loop_2> U: [-1,0) S: [-1,0) Exits: -1 LoopDispositions: { %loop_2: Computable } +; CHECK-NEXT: %tmp10 = add i32 %iv.1.2, 3 +; CHECK-NEXT: --> {2,+,1}<%loop_1> U: [2,4) S: [2,4) --> 3 U: [3,4) S: [3,4) +; CHECK-NEXT: Determining loop execution counts for: @test_08 +; CHECK-NEXT: Loop %loop_2: backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop_2: constant max backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop_2: symbolic max backedge-taken count is i64 0 +; CHECK-NEXT: Loop %loop_2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop_1: backedge-taken count is i64 1 +; CHECK-NEXT: Loop %loop_1: constant max backedge-taken count is i64 1 +; CHECK-NEXT: Loop %loop_1: symbolic max backedge-taken count is i64 1 +; CHECK-NEXT: Loop %loop_1: Trip multiple is 2 +; entry: br label %loop_1 @@ -515,22 +748,36 @@ exit: } define i64 @test_09(i32 %param) { - -; CHECK-LABEL: Classifying expressions for: @test_09 -; CHECK: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %outer.loop ] -; CHECK-NEXT: --> {0,+,1}<%loop1> -; CHECK: %iv1.trunc = trunc i64 %iv1 to i32 -; CHECK-NEXT: --> {0,+,1}<%loop1> -; CHECK: %iv1.next = add nuw nsw i64 %iv1, 1 -; CHECK-NEXT: --> {1,+,1}<%loop1> -; CHECK: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ] -; CHECK-NEXT: --> {%param,+,1}<%loop2> -; CHECK: %iv2.next = add i32 %iv2, 1 -; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2> -; CHECK: %iv2.ext = sext i32 %iv2.next to i64 -; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64) -; CHECK: %ret = mul i64 %iv1, %iv2.ext -; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<%loop1>) +; CHECK-LABEL: 'test_09' +; CHECK-NEXT: Classifying expressions for: @test_09 +; CHECK-NEXT: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %outer.loop ] +; CHECK-NEXT: --> {0,+,1}<%loop1> U: [0,3) S: [0,3) Exits: 2 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %iv1.trunc = trunc i64 %iv1 to i32 +; CHECK-NEXT: --> {0,+,1}<%loop1> U: [0,3) S: [0,3) Exits: 2 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %iv1.next = add nuw nsw i64 %iv1, 1 +; CHECK-NEXT: --> {1,+,1}<%loop1> U: [1,4) S: [1,4) Exits: 3 LoopDispositions: { %loop1: Computable } +; CHECK-NEXT: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ] +; CHECK-NEXT: --> {%param,+,1}<%loop2> U: full-set S: full-set Exits: (2 smax %param) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %iv2.next = add i32 %iv2, 1 +; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2> U: full-set S: full-set Exits: (1 + (2 smax %param)) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %iv2.ext = sext i32 %iv2.next to i64 +; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) --> (sext i32 (1 + (2 smax %param)) to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) +; CHECK-NEXT: %ret = mul i64 %iv1, %iv2.ext +; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<%loop1>) U: [-4294967296,4294967295) S: [-4294967296,4294967295) --> (2 * (sext i32 (1 + (2 smax %param)) to i64)) U: [0,-1) S: [-4294967296,4294967295) +; CHECK-NEXT: Determining loop execution counts for: @test_09 +; CHECK-NEXT: Loop %loop2: backedge-taken count is ((-1 * %param) + (2 smax %param)) +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 -2147483646 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is ((-1 * %param) + (2 smax %param)) +; CHECK-NEXT: Loop %loop2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop1: backedge-taken count is i64 2 +; CHECK-NEXT: exit count for loop1: i64 100 +; CHECK-NEXT: exit count for guarded: i32 2 +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i64 2 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is i64 2 +; CHECK-NEXT: symbolic max exit count for loop1: i64 100 +; CHECK-NEXT: symbolic max exit count for guarded: i32 2 +; CHECK-NEXT: Loop %loop1: Trip multiple is 1 +; entry: br label %outer.loop @@ -568,26 +815,47 @@ exit: ; preds = %loop2.exit } define i64 @test_10(i32 %param) { - -; CHECK-LABEL: Classifying expressions for: @test_10 -; CHECK: %uncle = phi i64 [ %uncle.outer.next, %uncle.loop.backedge ], [ 0, %outer.loop ] -; CHECK-NEXT: --> {0,+,1}<%uncle.loop> -; CHECK: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %uncle.loop ] -; CHECK-NEXT: --> {0,+,1}<%loop1> -; CHECK: %iv1.trunc = trunc i64 %iv1 to i32 -; CHECK-NEXT: --> {0,+,1}<%loop1> -; CHECK: %iv1.next = add nuw nsw i64 %iv1, 1 -; CHECK-NEXT: --> {1,+,1}<%loop1> -; CHECK: %uncle.outer.next = add i64 %uncle, 1 -; CHECK-NEXT: --> {1,+,1}<%uncle.loop> -; CHECK: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ] -; CHECK-NEXT: --> {%param,+,1}<%loop2> -; CHECK: %iv2.next = add i32 %iv2, 1 -; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2> -; CHECK: %iv2.ext = sext i32 %iv2.next to i64 -; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64) -; CHECK: %ret = mul i64 %iv1, %iv2.ext -; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<%loop1>) +; CHECK-LABEL: 'test_10' +; CHECK-NEXT: Classifying expressions for: @test_10 +; CHECK-NEXT: %uncle = phi i64 [ %uncle.outer.next, %uncle.loop.backedge ], [ 0, %outer.loop ] +; CHECK-NEXT: --> {0,+,1}<%uncle.loop> U: [0,1) S: [0,1) Exits: <> LoopDispositions: { %uncle.loop: Computable, %loop1: Invariant } +; CHECK-NEXT: %iv1 = phi i64 [ %iv1.next, %guarded ], [ 0, %uncle.loop ] +; CHECK-NEXT: --> {0,+,1}<%loop1> U: [0,3) S: [0,3) Exits: 2 LoopDispositions: { %loop1: Computable, %uncle.loop: Variant } +; CHECK-NEXT: %iv1.trunc = trunc i64 %iv1 to i32 +; CHECK-NEXT: --> {0,+,1}<%loop1> U: [0,3) S: [0,3) Exits: 2 LoopDispositions: { %loop1: Computable, %uncle.loop: Variant } +; CHECK-NEXT: %iv1.next = add nuw nsw i64 %iv1, 1 +; CHECK-NEXT: --> {1,+,1}<%loop1> U: [1,4) S: [1,4) Exits: 3 LoopDispositions: { %loop1: Computable, %uncle.loop: Variant } +; CHECK-NEXT: %uncle.outer.next = add i64 %uncle, 1 +; CHECK-NEXT: --> {1,+,1}<%uncle.loop> U: [1,2) S: [1,2) Exits: <> LoopDispositions: { %uncle.loop: Computable, %loop1: Invariant } +; CHECK-NEXT: %iv2 = phi i32 [ %iv2.next, %loop2 ], [ %param, %loop2.preheader ] +; CHECK-NEXT: --> {%param,+,1}<%loop2> U: full-set S: full-set Exits: (2 smax %param) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %iv2.next = add i32 %iv2, 1 +; CHECK-NEXT: --> {(1 + %param),+,1}<%loop2> U: full-set S: full-set Exits: (1 + (2 smax %param)) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: %iv2.ext = sext i32 %iv2.next to i64 +; CHECK-NEXT: --> (sext i32 {(1 + %param),+,1}<%loop2> to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) --> (sext i32 (1 + (2 smax %param)) to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) +; CHECK-NEXT: %ret = mul i64 %iv1, %iv2.ext +; CHECK-NEXT: --> ((sext i32 {(1 + %param),+,1}<%loop2> to i64) * {0,+,1}<%loop1>) U: [-4294967296,4294967295) S: [-4294967296,4294967295) --> (2 * (sext i32 (1 + (2 smax %param)) to i64)) U: [0,-1) S: [-4294967296,4294967295) +; CHECK-NEXT: Determining loop execution counts for: @test_10 +; CHECK-NEXT: Loop %loop2: backedge-taken count is ((-1 * %param) + (2 smax %param)) +; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 -2147483646 +; CHECK-NEXT: Loop %loop2: symbolic max backedge-taken count is ((-1 * %param) + (2 smax %param)) +; CHECK-NEXT: Loop %loop2: Trip multiple is 1 +; CHECK-NEXT: Loop %loop1: backedge-taken count is i64 2 +; CHECK-NEXT: exit count for loop1: i64 100 +; CHECK-NEXT: exit count for guarded: i32 2 +; CHECK-NEXT: Loop %loop1: constant max backedge-taken count is i64 2 +; CHECK-NEXT: Loop %loop1: symbolic max backedge-taken count is i64 2 +; CHECK-NEXT: symbolic max exit count for loop1: i64 100 +; CHECK-NEXT: symbolic max exit count for guarded: i32 2 +; CHECK-NEXT: Loop %loop1: Trip multiple is 1 +; CHECK-NEXT: Loop %uncle.loop: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for loop1: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for uncle.loop.backedge: i64 0 +; CHECK-NEXT: Loop %uncle.loop: constant max backedge-taken count is i64 0 +; CHECK-NEXT: Loop %uncle.loop: symbolic max backedge-taken count is i64 0 +; CHECK-NEXT: symbolic max exit count for loop1: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: symbolic max exit count for uncle.loop.backedge: i64 0 +; entry: br label %outer.loop diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll index 64fb5b36b2c62..1ed6a273338ab 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll @@ -240,6 +240,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .section .drectve,"yni" ; CHECK-NEXT: .ascii " /EXPORT:exp" +; CHECK-NEXT: .def "EXP+#func"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .def func; ; CHECK-NEXT: .scl 2; ; CHECK-NEXT: .type 32; @@ -252,6 +256,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#func", "#func$hybpatch_thunk"{{$}} +; CHECK-NEXT: .def "EXP+#has_varargs"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .def has_varargs; ; CHECK-NEXT: .scl 2; ; CHECK-NEXT: .type 32; @@ -264,6 +272,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_varargs", "#has_varargs$hybpatch_thunk" +; CHECK-NEXT: .def "EXP+#has_sret"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .def has_sret; ; CHECK-NEXT: .scl 2; ; CHECK-NEXT: .type 32; @@ -276,6 +288,10 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: .type 32; ; CHECK-NEXT: .endef ; CHECK-NEXT: .set "#has_sret", "#has_sret$hybpatch_thunk" +; CHECK-NEXT: .def "EXP+#exp"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef ; CHECK-NEXT: .def exp; ; CHECK-NEXT: .scl 2; ; CHECK-NEXT: .type 32; @@ -295,18 +311,18 @@ define dso_local void @caller() nounwind { ; SYM: [78](sec 20)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x00000000 #exp$hybpatch_thunk ; SYM: [110](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 func ; SYM-NEXT: AUX indx 112 srch 3 -; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x00000000 EXP+#func +; SYM-NEXT: [112](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x00000000 EXP+#func ; SYM: [116](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 #func ; SYM-NEXT: AUX indx 53 srch 3 ; SYM: [122](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 has_varargs ; SYM-NEXT: AUX indx 124 srch 3 -; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x00000000 EXP+#has_varargs +; SYM-NEXT: [124](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x00000000 EXP+#has_varargs ; SYM-NEXT: [125](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 has_sret ; SYM-NEXT: AUX indx 127 srch 3 -; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x00000000 EXP+#has_sret +; SYM-NEXT: [127](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x00000000 EXP+#has_sret ; SYM-NEXT: [128](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 exp ; SYM-NEXT: AUX indx 130 srch 3 -; SYM-NEXT: [130](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x00000000 EXP+#exp +; SYM-NEXT: [130](sec 0)(fl 0x00)(ty 20)(scl 2) (nx 0) 0x00000000 EXP+#exp ; SYM-NEXT: [131](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 #has_varargs ; SYM-NEXT: AUX indx 58 srch 3 ; SYM-NEXT: [133](sec 0)(fl 0x00)(ty 0)(scl 69) (nx 1) 0x00000000 #has_sret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll index 1c8a8d635274e..dcc11609ca231 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll @@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov z7.d, z0.d ; CHECK-NEXT: add x2, x2, x11 -; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 -; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b +; CHECK-NEXT: and z2.d, z2.d, #0xffffffff +; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll index 66d670d0b796b..cdf2a962f9322 100644 --- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll +++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll @@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison( %a) { define i32 @ctz_and_nxv16i1( %pg, %a, %b) { ; CHECK-LABEL: ctz_and_nxv16i1: ; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b -; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b ; CHECK-NEXT: brkb p0.b, p1/z, p0.b ; CHECK-NEXT: cntp x0, p0, p0.b ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll index 35cbe65c6a8b8..fc5e640aed4ae 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -544,3 +544,119 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero( %pg, %cmp, 1 ret %svboolx2 %ins.2 } + +define @logical_and_oeq_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_oeq_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ret + %y = fcmp oeq %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define @logical_and_ogt_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_ogt_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ret + %y = fcmp ogt %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define @logical_and_oge_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_oge_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ret + %y = fcmp oge %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define @logical_and_olt_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_olt_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp olt %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define @logical_and_ole_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_ole_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ret + %y = fcmp ole %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define @logical_and_une_zero_pred( %pg, %x) { +; CHECK-LABEL: logical_and_une_zero_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: ret + %y = fcmp une %x, zeroinitializer + %z = select %pg, %y, zeroinitializer + ret %z +} + +define %svboolx2 @logical_and_of_multiuse_fcmp_ogt( %pg, %x, %y) { +; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ret + %cmp = fcmp ogt %x, %y + %and = select %pg, %cmp, zeroinitializer + %ins.1 = insertvalue %svboolx2 poison, %and, 0 + %ins.2 = insertvalue %svboolx2 %ins.1, %cmp, 1 + ret %svboolx2 %ins.2 +} + +define %svboolx2 @logical_and_of_multiuse_fcmp_ogt_zero( %pg, %x) { +; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ret + %cmp = fcmp ogt %x, zeroinitializer + %and = select %pg, %cmp, zeroinitializer + %ins.1 = insertvalue %svboolx2 poison, %and, 0 + %ins.2 = insertvalue %svboolx2 %ins.1, %cmp, 1 + ret %svboolx2 %ins.2 +} + +define %svboolx2 @logical_and_of_multiuse_fcmp_olt( %pg, %x, %y) { +; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ret + %cmp = fcmp olt %x, %y + %and = select %pg, %cmp, zeroinitializer + %ins.1 = insertvalue %svboolx2 poison, %and, 0 + %ins.2 = insertvalue %svboolx2 %ins.1, %cmp, 1 + ret %svboolx2 %ins.2 +} + +define %svboolx2 @logical_and_of_multiuse_fcmp_olt_zero( %pg, %x) { +; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ret + %cmp = fcmp olt %x, zeroinitializer + %and = select %pg, %cmp, zeroinitializer + %ins.1 = insertvalue %svboolx2 poison, %and, 0 + %ins.2 = insertvalue %svboolx2 %ins.1, %cmp, 1 + ret %svboolx2 %ins.2 +} diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll index 0d7f230062650..afe13851f0b95 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, %i37, < ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s ; CHECK-NEXT: add z0.d, z2.d, z1.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b +; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b ; CHECK-NEXT: mov z0.d, p2/m, z2.d ; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d ; CHECK-NEXT: uaddv d0, p0, z0.d diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index c896bfe925ed8..1b1ea52520c0b 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -279,15 +279,15 @@ ; GCN-O1-NEXT: AMDGPU Rewrite Undef for PHI ; GCN-O1-NEXT: LCSSA Verifier ; GCN-O1-NEXT: Loop-Closed SSA Form Pass +; GCN-O1-NEXT: DummyCGSCCPass +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: ObjC ARC contraction -; GCN-O1-NEXT: DummyCGSCCPass -; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Prepare callbr ; GCN-O1-NEXT: Safe Stack instrumentation pass ; GCN-O1-NEXT: Insert stack protectors -; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: Uniformity Analysis ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) @@ -574,15 +574,15 @@ ; GCN-O1-OPTS-NEXT: AMDGPU Rewrite Undef for PHI ; GCN-O1-OPTS-NEXT: LCSSA Verifier ; GCN-O1-OPTS-NEXT: Loop-Closed SSA Form Pass +; GCN-O1-OPTS-NEXT: DummyCGSCCPass +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: ObjC ARC contraction -; GCN-O1-OPTS-NEXT: DummyCGSCCPass -; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Prepare callbr ; GCN-O1-OPTS-NEXT: Safe Stack instrumentation pass ; GCN-O1-OPTS-NEXT: Insert stack protectors -; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Uniformity Analysis ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) @@ -882,17 +882,15 @@ ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass ; GCN-O2-NEXT: Analysis if a function is memory bound +; GCN-O2-NEXT: DummyCGSCCPass ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: ObjC ARC contraction -; GCN-O2-NEXT: DummyCGSCCPass -; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Prepare callbr ; GCN-O2-NEXT: Safe Stack instrumentation pass ; GCN-O2-NEXT: Insert stack protectors -; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Cycle Info Analysis ; GCN-O2-NEXT: Uniformity Analysis ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) @@ -1205,17 +1203,15 @@ ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass ; GCN-O3-NEXT: Analysis if a function is memory bound +; GCN-O3-NEXT: DummyCGSCCPass ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: ObjC ARC contraction -; GCN-O3-NEXT: DummyCGSCCPass -; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Prepare callbr ; GCN-O3-NEXT: Safe Stack instrumentation pass ; GCN-O3-NEXT: Insert stack protectors -; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Cycle Info Analysis ; GCN-O3-NEXT: Uniformity Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 06b31657e0eca..a4d58985b75de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1501,18 +1501,23 @@ define @vwadd_vx_splat_zext_i1( %va, i16 %b) ; RV32: # %bb.0: ; RV32-NEXT: slli a0, a0, 16 ; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vadd.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: vwadd_vx_splat_zext_i1: ; RV64: # %bb.0: ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vi v8, v8, 1, v0.t +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vwaddu.vx v8, v12, a0, v0.t ; RV64-NEXT: ret %zb = zext i16 %b to i32 %head = insertelement poison, i32 %zb, i32 0 @@ -1570,20 +1575,23 @@ define @vwadd_vx_splat_sext_i1( %va, i16 %b) ; RV32: # %bb.0: ; RV32-NEXT: slli a0, a0, 16 ; RV32-NEXT: srai a0, a0, 16 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsub.vx v8, v8, a0, v0.t +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: vwadd_vx_splat_sext_i1: ; RV64: # %bb.0: ; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srai a0, a0, 48 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vwsub.vx v8, v12, a0, v0.t ; RV64-NEXT: ret %sb = sext i16 %b to i32 %head = insertelement poison, i32 %sb, i32 0 diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll index 6eb748a1afbab..b35a1b72fcb6f 100644 --- a/llvm/test/CodeGen/X86/cttz.ll +++ b/llvm/test/CodeGen/X86/cttz.ll @@ -317,13 +317,11 @@ define i32 @cttz_i32_zero_test(i32 %n) { ; ; X64-LABEL: cttz_i32_zero_test: ; X64: # %bb.0: -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB6_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB6_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq ; ; X86-CLZ-LABEL: cttz_i32_zero_test: diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll index df11a44626e38..d5d604a138a71 100644 --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -54,13 +54,12 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: or_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: orl %esi, %edi -; X64-NEXT: je .LBB1_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB1_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = or i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -115,13 +114,10 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) { ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: testb $1, %dil ; X64-NEXT: cmovnel %esi, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB3_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB3_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %y = or i32 %x, 1 %z = select i1 %c, i32 %y, i32 0 @@ -216,16 +212,14 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: shl_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB7_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB7_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = shl nuw nsw i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -275,13 +269,10 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: addl %esi, %edi ; X64-NEXT: movl $-1, %eax ; X64-NEXT: cmovael %edi, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB9_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -334,15 +325,13 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: umax_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: cmpl %esi, %edi ; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB11_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB11_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -401,13 +390,10 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: cmpl $54, %edi ; X64-NEXT: movl $54, %eax ; X64-NEXT: cmovbl %edi, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB13_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB13_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.umin.i32(i32 %x, i32 54) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -522,13 +508,10 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: cmpl $54, %edi ; X64-NEXT: movl $54, %eax ; X64-NEXT: cmovll %edi, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB17_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB17_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.smin.i32(i32 %x, i32 54) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -643,13 +626,10 @@ define i32 @smax_known_zero(i32 %x, i32 %y) { ; X64-NEXT: testl %edi, %edi ; X64-NEXT: movl $-1, %eax ; X64-NEXT: cmovnsl %edi, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB21_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB21_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.smax.i32(i32 %x, i32 -1) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -676,16 +656,9 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: orl $256, %edi # imm = 0x100 -; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB22_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB22_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: rorl %cl, %edi +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %x = or i32 %xx, 256 %shr = lshr i32 %x, %y @@ -714,16 +687,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB23_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB23_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: rorl %cl, %edi +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %shr = lshr i32 %x, %y %sub = sub i32 32, %y @@ -775,16 +745,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_with_fshr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB25_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB25_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: rorl %cl, %edi +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -811,16 +778,9 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: orl $256, %edi # imm = 0x100 -; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB26_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB26_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: roll %cl, %edi +; X64-NEXT: rep bsfl %edi, %eax ; X64-NEXT: retq %x = or i32 %xx, 256 %shl = shl i32 %x, %y @@ -849,16 +809,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB27_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB27_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: roll %cl, %edi +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -910,16 +867,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_with_fshl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %eax -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB29_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB29_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: roll %cl, %edi +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -989,16 +943,14 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: sra_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: sarl %cl, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB32_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB32_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = ashr exact i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1068,16 +1020,14 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: srl_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB35_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB35_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = lshr exact i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1128,13 +1078,11 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divl %esi -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB37_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB37_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = udiv exact i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1185,13 +1133,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %esi -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB39_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB39_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = sdiv exact i32 %x, %y %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1235,14 +1181,13 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) { ; ; X64-LABEL: add_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: orl $1, %edi ; X64-NEXT: addl %esi, %edi -; X64-NEXT: je .LBB41_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB41_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x = or i32 %xx, 1 %z = add nsw i32 %x, %y @@ -1321,12 +1266,10 @@ define i32 @sub_maybe_zero(i32 %x) { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: orl $64, %eax ; X64-NEXT: subl %edi, %eax -; X64-NEXT: je .LBB44_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB44_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %y = or i32 %x, 64 %z = sub i32 %y, %x @@ -1349,13 +1292,12 @@ define i32 @sub_maybe_zero2(i32 %x) { ; ; X64-LABEL: sub_maybe_zero2: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: negl %edi -; X64-NEXT: je .LBB45_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB45_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = sub i32 0, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1379,15 +1321,13 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) { ; ; X64-LABEL: mul_known_nonzero_nsw: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: orl $256, %esi # imm = 0x100 ; X64-NEXT: imull %edi, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB46_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB46_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %y = or i32 %yy, 256 %z = mul nsw i32 %y, %x @@ -1412,15 +1352,13 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) { ; ; X64-LABEL: mul_known_nonzero_nuw: ; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: orl $256, %esi # imm = 0x100 ; X64-NEXT: imull %edi, %esi -; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB47_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %esi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB47_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %y = or i32 %yy, 256 %z = mul nuw i32 %y, %x @@ -1444,14 +1382,12 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) { ; ; X64-LABEL: mul_maybe_zero: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: imull %esi, %edi -; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB48_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB48_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: rep bsfq %rax, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = mul nuw nsw i32 %y, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1482,9 +1418,10 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) { ; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u] ; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: bsfl %eax, %ecx -; X64-NEXT: movl $32, %eax -; X64-NEXT: cmovnel %ecx, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x = shl nuw nsw <2 x i16> , %xx %z = bitcast <2 x i16> %x to i32 @@ -1508,13 +1445,10 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) { ; X64-LABEL: bitcast_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB50_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB50_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = bitcast <2 x i16> %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1538,13 +1472,10 @@ define i32 @bitcast_from_float(float %x) { ; X64-LABEL: bitcast_from_float: ; X64: # %bb.0: ; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB51_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB51_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = bitcast float %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1592,14 +1523,11 @@ define i32 @zext_maybe_zero(i16 %x) { ; ; X64-LABEL: zext_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: testw %di, %di -; X64-NEXT: je .LBB53_1 -; X64-NEXT: # %bb.2: # %cond.false ; X64-NEXT: movzwl %di, %eax -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB53_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = zext i16 %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -1646,14 +1574,11 @@ define i32 @sext_maybe_zero(i16 %x) { ; ; X64-LABEL: sext_maybe_zero: ; X64: # %bb.0: -; X64-NEXT: testw %di, %di -; X64-NEXT: je .LBB55_1 -; X64-NEXT: # %bb.2: # %cond.false ; X64-NEXT: movswl %di, %eax -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB55_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %z = sext i16 %x to i32 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll index 9820ec42f5b8c..fdbe75b467d99 100644 --- a/llvm/test/CodeGen/X86/pr89877.ll +++ b/llvm/test/CodeGen/X86/pr89877.ll @@ -24,14 +24,11 @@ define i32 @sext_known_nonzero(i16 %xx) { ; X64-NEXT: movl $256, %eax # imm = 0x100 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shll %cl, %eax -; X64-NEXT: cwtl -; X64-NEXT: testl %eax, %eax -; X64-NEXT: je .LBB0_1 -; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %eax, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB0_1: -; X64-NEXT: movl $32, %eax +; X64-NEXT: movswq %ax, %rax +; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: rep bsfq %rcx, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %x = shl i16 256, %xx %z = sext i16 %x to i32 diff --git a/llvm/test/CodeGen/X86/pr90847.ll b/llvm/test/CodeGen/X86/pr90847.ll index 7aa0ceb26e1ac..f2d43c3ed8d5b 100644 --- a/llvm/test/CodeGen/X86/pr90847.ll +++ b/llvm/test/CodeGen/X86/pr90847.ll @@ -15,14 +15,10 @@ define i32 @PR90847(<8 x float> %x) nounwind { ; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: testl %eax, %eax -; AVX1-NEXT: je .LBB0_1 -; AVX1-NEXT: # %bb.2: # %cond.false -; AVX1-NEXT: rep bsfl %eax, %eax -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB0_1: -; AVX1-NEXT: movl $32, %eax +; AVX1-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: rep bsfq %rcx, %rax +; AVX1-NEXT: # kill: def $eax killed $eax killed $rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -36,14 +32,10 @@ define i32 @PR90847(<8 x float> %x) nounwind { ; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: testl %eax, %eax -; AVX2-NEXT: je .LBB0_1 -; AVX2-NEXT: # %bb.2: # %cond.false -; AVX2-NEXT: rep bsfl %eax, %eax -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; AVX2-NEXT: .LBB0_1: -; AVX2-NEXT: movl $32, %eax +; AVX2-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000 +; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: rep bsfq %rcx, %rax +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq entry: diff --git a/llvm/test/Transforms/GlobalMerge/private-global.ll b/llvm/test/Transforms/GlobalMerge/private-global.ll new file mode 100644 index 0000000000000..c4152a242d59f --- /dev/null +++ b/llvm/test/Transforms/GlobalMerge/private-global.ll @@ -0,0 +1,36 @@ +; RUN: opt -global-merge -global-merge-max-offset=100 -S -o - %s | FileCheck %s +; RUN: opt -passes='global-merge' -S -o - %s | FileCheck %s + +; NOTE: This is a copy of the llvm/test/Transforms/GlobalMerge/basic.ll test, +; using `private` global variables instead of `internal`. This is to show that +; that private globals can be merged in the GlobalMerge pass. + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @_MergedGlobals = private global <{ i32, i32 }> <{ i32 1, i32 2 }>, align 4 +; CHECK: @_MergedGlobals.1 = private global <{ i32, i32 }> <{ i32 3, i32 4 }>, section "foo", align 4 + +; CHECK-DAG: @a = private alias i32, ptr @_MergedGlobals{{$}} +@a = private global i32 1 + +; CHECK-DAG: @b = private alias i32, getelementptr inbounds (<{ i32, i32 }>, ptr @_MergedGlobals, i32 0, i32 1) +@b = private global i32 2 + +; CHECK-DAG: @c = private alias i32, ptr @_MergedGlobals.1{{$}} +@c = private global i32 3, section "foo" + +; CHECK-DAG: @d = private alias i32, getelementptr inbounds (<{ i32, i32 }>, ptr @_MergedGlobals.1, i32 0, i32 1) +@d = private global i32 4, section "foo" + +define void @use_private() { + ; CHECK: load i32, ptr @_MergedGlobals, + %x = load i32, ptr @a + ; CHECK: load i32, ptr getelementptr inbounds (<{ i32, i32 }>, ptr @_MergedGlobals, i32 0, i32 1) + %y = load i32, ptr @b + ; CHECK: load i32, ptr @_MergedGlobals.1 + %z1 = load i32, ptr @c + ; CHECK: load i32, ptr getelementptr inbounds (<{ i32, i32 }>, ptr @_MergedGlobals.1, i32 0, i32 1) + %z2 = load i32, ptr @d + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index 4a72a5ff8395d..31ee107c81cd4 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -222,3 +222,77 @@ entry: %cmp = icmp ugt <4 x i32> %vbsl, ret <4 x i1> %cmp } + +define void @test7() { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> [[TMP1]], <8 x i64> zeroinitializer, i64 8) +; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i64> [[TMP2]] to <16 x i16> +; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr null, align 2 +; CHECK-NEXT: ret void +; + %1 = getelementptr i8, ptr null, i64 16 + %2 = trunc <8 x i64> zeroinitializer to <8 x i16> + store <8 x i16> %2, ptr %1, align 2 + %3 = trunc <8 x i64> zeroinitializer to <8 x i16> + store <8 x i16> %3, ptr null, align 2 + ret void +} + +define void @test8() { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> zeroinitializer, i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP1]], <2 x float> zeroinitializer, i64 4) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP2]], <2 x float> zeroinitializer, i64 6) +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2) +; CHECK-NEXT: br i1 false, label [[FOR0:%.*]], label [[FOR_BODY:%.*]] +; CHECK: for0: +; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP5]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP8]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: br i1 false, label [[FOR0]], label [[FOR_BODY]] +; +entry: + br i1 false, label %for0, label %for.body + +for0: + %0 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ] + %1 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ] + %2 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ] + %3 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ] + ret void + +for.body: + %4 = phi <2 x float> [ %4, %for.body ], [ zeroinitializer, %entry ] + %5 = phi <2 x float> [ %5, %for.body ], [ zeroinitializer, %entry ] + br i1 false, label %for0, label %for.body +} + +define void @test9() { +; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) +; CHECK-NEXT: br label [[FOR_BODY13:%.*]] +; CHECK: for.body13: +; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32> +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4 +; CHECK-NEXT: br label [[FOR_BODY13]] +; +entry: + br label %for.body13 + +for.body13: ; preds = %for.body13, %entry + %vmovl.i111 = sext <4 x i16> zeroinitializer to <4 x i32> + %vmovl.i110 = sext <4 x i16> zeroinitializer to <4 x i32> + store <4 x i32> %vmovl.i111, ptr null, align 4 + %add.ptr29 = getelementptr i8, ptr null, i64 16 + store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4 + br label %for.body13 +} diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-missing-probe.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-missing-probe.prof new file mode 100644 index 0000000000000..cc50d6a2fad2b --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-missing-probe.prof @@ -0,0 +1,13 @@ +main:89650:0 + 1: 0 + 2: 16724 + 3: 16724 + 4: 14342 + 5: 15026 bar:15026 + 6: 1882 + 8: 16724 + 9: 0 + !CFGChecksum: 563091374530180 +bar:15026:15026 + 1: 15026 + !CFGChecksum: 4294967295 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-missing-probe.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-missing-probe.ll new file mode 100644 index 0000000000000..3d559f2fb0159 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-missing-probe.ll @@ -0,0 +1,243 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-missing-probe.prof -S | FileCheck %s + +; CHECK: br i1 %tobool.not.i, label %if.end.i, label %if.then.i, !dbg ![[#]], !prof ![[#PROF:]] + +; CHECK: [[#PROF]] = !{!"branch_weights", i32 918, i32 918} +; Verify the else branch is not set to a zero count +; CHECK-NOT: [[#PROF]] = !{!"branch_weights", i32 1698, i32 0} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable +define dso_local void @bar(i32 %i) local_unnamed_addr #0 !dbg !18 { +entry: + #dbg_value(i32 poison, !22, !DIExpression(), !23) + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24 + %0 = load volatile i32, ptr @x, align 4, !dbg !24, !tbaa !25 + %add = add nsw i32 %0, 5, !dbg !24 + store volatile i32 %add, ptr @x, align 4, !dbg !24, !tbaa !25 + ret void, !dbg !29 +} + +; Function Attrs: nofree norecurse nounwind memory(readwrite, argmem: none) uwtable +define dso_local void @baz(i32 noundef %i) local_unnamed_addr #1 !dbg !30 { +entry: + #dbg_value(i32 %i, !32, !DIExpression(), !33) + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !34 + %rem = srem i32 %i, 100, !dbg !36 + %tobool.not = icmp eq i32 %rem, 0, !dbg !36 + br i1 %tobool.not, label %if.end, label %if.then, !dbg !37 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 2, i32 0, i64 -1), !dbg !38 + %0 = load volatile i32, ptr @x, align 4, !dbg !38, !tbaa !25 + %inc = add nsw i32 %0, 1, !dbg !38 + store volatile i32 %inc, ptr @x, align 4, !dbg !38, !tbaa !25 + br label %if.end, !dbg !39 + +if.end: ; preds = %if.then, %entry + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 3, i32 0, i64 -1), !dbg !40 + %1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !25 + %add = add nsw i32 %1, 2, !dbg !40 + store volatile i32 %add, ptr @x, align 4, !dbg !40, !tbaa !25 + %2 = and i32 %i, 1, !dbg !41 + %tobool2.not = icmp eq i32 %2, 0, !dbg !41 + br i1 %tobool2.not, label %if.else, label %if.end11, !dbg !43 + +if.else: ; preds = %if.end + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 5, i32 0, i64 -1), !dbg !44 + %rem5 = srem i32 %i, 3, !dbg !46 + %tobool6.not = icmp eq i32 %rem5, 0, !dbg !46 + %spec.select = select i1 %tobool6.not, i32 -1, i32 2, !dbg !47 + br label %if.end11, !dbg !47 + +if.end11: ; preds = %if.else, %if.end + %.sink14 = phi i32 [ 1, %if.end ], [ %spec.select, %if.else ] + %3 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !25 + %add8 = add nsw i32 %3, %.sink14, !dbg !48 + store volatile i32 %add8, ptr @x, align 4, !dbg !48, !tbaa !25 + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 9, i32 0, i64 -1), !dbg !49 + ret void, !dbg !49 +} + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local noundef i32 @main() local_unnamed_addr #2 !dbg !50 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !55 + #dbg_value(i32 0, !54, !DIExpression(), !56) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !57 + br label %while.body, !dbg !58 + +while.body: ; preds = %entry, %if.end + %inc7 = phi i32 [ 1, %entry ], [ %inc, %if.end ] + %i.06 = phi i32 [ 0, %entry ], [ %inc7, %if.end ] + #dbg_value(i32 %i.06, !54, !DIExpression(), !56) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !59 + %rem = urem i32 %inc7, 10, !dbg !62 + %tobool.not = icmp eq i32 %rem, 0, !dbg !62 + br i1 %tobool.not, label %if.else, label %if.then, !dbg !63 + +if.then: ; preds = %while.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !64 + tail call void @bar(i32 poison), !dbg !65 + br label %if.end, !dbg !67 + +if.else: ; preds = %while.body + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !68 + #dbg_value(i32 %inc7, !32, !DIExpression(), !69) + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !72 + %rem.i4 = urem i32 %inc7, 100, !dbg !73 + %tobool.not.i = icmp eq i32 %rem.i4, 0, !dbg !73 + br i1 %tobool.not.i, label %if.end.i, label %if.then.i, !dbg !74 + +if.then.i: ; preds = %if.else + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 2, i32 0, i64 -1), !dbg !75 + %0 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !25 + %inc.i = add nsw i32 %0, 1, !dbg !75 + store volatile i32 %inc.i, ptr @x, align 4, !dbg !75, !tbaa !25 + br label %if.end.i, !dbg !76 + +if.end.i: ; preds = %if.then.i, %if.else + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 3, i32 0, i64 -1), !dbg !77 + %1 = load volatile i32, ptr @x, align 4, !dbg !77, !tbaa !25 + %add.i = add nsw i32 %1, 2, !dbg !77 + store volatile i32 %add.i, ptr @x, align 4, !dbg !77, !tbaa !25 + %2 = and i32 %i.06, 1, !dbg !78 + %tobool2.not.i.not = icmp eq i32 %2, 0, !dbg !78 + br i1 %tobool2.not.i.not, label %baz.exit, label %if.else.i, !dbg !79 + +if.else.i: ; preds = %if.end.i + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 5, i32 0, i64 -1), !dbg !80 + %rem5.i5 = urem i32 %inc7, 3, !dbg !81 + %tobool6.not.i = icmp eq i32 %rem5.i5, 0, !dbg !81 + %spec.select.i = select i1 %tobool6.not.i, i32 -1, i32 2, !dbg !82 + br label %baz.exit, !dbg !82 + +baz.exit: ; preds = %if.end.i, %if.else.i + %.sink14.i = phi i32 [ 1, %if.end.i ], [ %spec.select.i, %if.else.i ] + %3 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !25 + %add8.i = add nsw i32 %3, %.sink14.i, !dbg !83 + store volatile i32 %add8.i, ptr @x, align 4, !dbg !83, !tbaa !25 + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 9, i32 0, i64 -1), !dbg !84 + br label %if.end + +if.end: ; preds = %baz.exit, %if.then + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !58 + #dbg_value(i32 %inc7, !54, !DIExpression(), !56) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !57 + %inc = add nuw nsw i32 %inc7, 1, !dbg !57 + #dbg_value(i32 %inc, !54, !DIExpression(), !56) + %exitcond.not = icmp eq i32 %inc, 160000001, !dbg !85 + br i1 %exitcond.not, label %while.end, label %while.body, !dbg !58, !llvm.loop !86 + +while.end: ; preds = %if.end + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !89 + ret i32 0, !dbg !89 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 + +attributes #0 = { nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree norecurse nounwind memory(readwrite, argmem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { nofree norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile"} +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} +!llvm.ident = !{!14} +!llvm.pseudo_probe_desc = !{!15, !16, !17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "b67c15e928f76c51702a59639dbebb4c") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!14 = !{!"clang version 20.0.0"} +!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!16 = !{i64 7546896869197086323, i64 191430930410, !"baz"} +!17 = !{i64 -2624081020897602054, i64 563091374530180, !"main"} +!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{null, !6} +!21 = !{!22} +!22 = !DILocalVariable(name: "i", arg: 1, scope: !18, file: !3, line: 3, type: !6) +!23 = !DILocation(line: 0, scope: !18) +!24 = !DILocation(line: 4, column: 5, scope: !18) +!25 = !{!26, !26, i64 0} +!26 = !{!"int", !27, i64 0} +!27 = !{!"omnipotent char", !28, i64 0} +!28 = !{!"Simple C/C++ TBAA"} +!29 = !DILocation(line: 8, column: 1, scope: !18) +!30 = distinct !DISubprogram(name: "baz", scope: !3, file: !3, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !31) +!31 = !{!32} +!32 = !DILocalVariable(name: "i", arg: 1, scope: !30, file: !3, line: 10, type: !6) +!33 = !DILocation(line: 0, scope: !30) +!34 = !DILocation(line: 11, column: 6, scope: !35) +!35 = distinct !DILexicalBlock(scope: !30, file: !3, line: 11, column: 6) +!36 = !DILocation(line: 11, column: 7, scope: !35) +!37 = !DILocation(line: 11, column: 6, scope: !30) +!38 = !DILocation(line: 12, column: 6, scope: !35) +!39 = !DILocation(line: 12, column: 5, scope: !35) +!40 = !DILocation(line: 14, column: 5, scope: !30) +!41 = !DILocation(line: 15, column: 9, scope: !42) +!42 = distinct !DILexicalBlock(scope: !30, file: !3, line: 15, column: 7) +!43 = !DILocation(line: 15, column: 7, scope: !30) +!44 = !DILocation(line: 17, column: 12, scope: !45) +!45 = distinct !DILexicalBlock(scope: !42, file: !3, line: 17, column: 12) +!46 = !DILocation(line: 17, column: 14, scope: !45) +!47 = !DILocation(line: 17, column: 12, scope: !42) +!48 = !DILocation(line: 0, scope: !42) +!49 = !DILocation(line: 21, column: 1, scope: !30) +!50 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 23, type: !51, scopeLine: 23, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !53) +!51 = !DISubroutineType(types: !52) +!52 = !{!6} +!53 = !{!54} +!54 = !DILocalVariable(name: "i", scope: !50, file: !3, line: 24, type: !6) +!55 = !DILocation(line: 24, column: 7, scope: !50) +!56 = !DILocation(line: 0, scope: !50) +!57 = !DILocation(line: 25, column: 11, scope: !50) +!58 = !DILocation(line: 25, column: 3, scope: !50) +!59 = !DILocation(line: 26, column: 8, scope: !60) +!60 = distinct !DILexicalBlock(scope: !61, file: !3, line: 26, column: 8) +!61 = distinct !DILexicalBlock(scope: !50, file: !3, line: 25, column: 30) +!62 = !DILocation(line: 26, column: 10, scope: !60) +!63 = !DILocation(line: 26, column: 8, scope: !61) +!64 = !DILocation(line: 27, column: 10, scope: !60) +!65 = !DILocation(line: 27, column: 6, scope: !66) +!66 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 455082031) +!67 = !DILocation(line: 27, column: 6, scope: !60) +!68 = !DILocation(line: 29, column: 10, scope: !60) +!69 = !DILocation(line: 0, scope: !30, inlinedAt: !70) +!70 = distinct !DILocation(line: 29, column: 6, scope: !71) +!71 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 455082047) +!72 = !DILocation(line: 11, column: 6, scope: !35, inlinedAt: !70) +!73 = !DILocation(line: 11, column: 7, scope: !35, inlinedAt: !70) +!74 = !DILocation(line: 11, column: 6, scope: !30, inlinedAt: !70) +!75 = !DILocation(line: 12, column: 6, scope: !35, inlinedAt: !70) +!76 = !DILocation(line: 12, column: 5, scope: !35, inlinedAt: !70) +!77 = !DILocation(line: 14, column: 5, scope: !30, inlinedAt: !70) +!78 = !DILocation(line: 15, column: 9, scope: !42, inlinedAt: !70) +!79 = !DILocation(line: 15, column: 7, scope: !30, inlinedAt: !70) +!80 = !DILocation(line: 17, column: 12, scope: !45, inlinedAt: !70) +!81 = !DILocation(line: 17, column: 14, scope: !45, inlinedAt: !70) +!82 = !DILocation(line: 17, column: 12, scope: !42, inlinedAt: !70) +!83 = !DILocation(line: 0, scope: !42, inlinedAt: !70) +!84 = !DILocation(line: 21, column: 1, scope: !30, inlinedAt: !70) +!85 = !DILocation(line: 25, column: 14, scope: !50) +!86 = distinct !{!86, !58, !87, !88} +!87 = !DILocation(line: 30, column: 3, scope: !50) +!88 = !{!"llvm.loop.mustprogress"} +!89 = !DILocation(line: 31, column: 3, scope: !50) diff --git a/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_implicit_pointer.yaml b/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_implicit_pointer.yaml new file mode 100644 index 0000000000000..b6b2d1d0e183e --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_implicit_pointer.yaml @@ -0,0 +1,87 @@ +# Test that we can decode `DW_OP_implicit_pointer` (0xa0) +# RUN: yaml2obj %s | llvm-dwarfdump - | FileCheck %s + +# CHECK: DW_TAG_variable +# CHECK-NEXT: DW_AT_location (DW_OP_implicit_pointer 0x2a +4) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +DWARF: + debug_abbrev: + - Table: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Code: 0x00000003 + Tag: DW_TAG_formal_parameter + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Code: 0x00000004 + Tag: DW_TAG_variable + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + debug_info: + - Length: 52 + Version: 5 + UnitType: DW_UT_compile + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x000000000000000C + - Value: 0x0000000100000F50 + - Value: 0x0000000000000034 + - AbbrCode: 0x00000002 + Values: + - Value: 0x0000000100000F50 + - Value: 0x0000000000000034 + - Value: 0x0000000000000001 + BlockData: + - 0x56 + - AbbrCode: 0x00000003 + Values: + - Value: 0x0000000000000002 + BlockData: + - 0x91 + - 0x78 + - AbbrCode: 0x00000004 + Values: + - Value: 0x0000000000000006 + BlockData: + - 0xa0 # DW_OP_implicit_pointer + - 0x2a # Section offset of parameter in the previous entry + - 0x00 + - 0x00 + - 0x00 + - 0x04 # Pointer references location 4 bytes into value of previous entry + - AbbrCode: 0x00000000 + Values: + - AbbrCode: 0x00000000 + Values: +... diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index aa2b4543927a7..dddbd837c1cbc 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -126,11 +126,7 @@ def AMDGPU_RawBufferLoadOp : DefaultValuedAttr:$boundsCheck, OptionalAttr:$indexOffset, Optional:$sgprOffset)>, - Results<(outs AnyTypeOf<[BF16, F16, F32, I32, I8, F8E5M2FNUZ, F8E4M3FNUZ, - VectorOfLengthAndType<[2, 4], [F32, I32]>, - VectorOfLengthAndType<[2, 4, 8], [F16, BF16]>, - VectorOfLengthAndType<[2, 4, 8, 16], - [I8, F8E5M2FNUZ, F8E4M3FNUZ]>]>:$value)> { + Results<(outs AnyType:$value)> { let summary = "Raw Buffer load, exposing GCN features"; let description = [{ @@ -176,11 +172,7 @@ def AMDGPU_RawBufferLoadOp : def AMDGPU_RawBufferStoreOp : AMDGPU_Op<"raw_buffer_store", [AllElementTypesMatch<["value", "memref"]>, AttrSizedOperandSegments]>, - Arguments<(ins AnyTypeOf<[BF16, F16, F32, I32, I8, F8E5M2FNUZ, F8E4M3FNUZ, - VectorOfLengthAndType<[2, 4], [F32, I32]>, - VectorOfLengthAndType<[2, 4, 8], [F16, BF16]>, - VectorOfLengthAndType<[2, 4, 8, 16], - [I8, F8E5M2FNUZ, F8E4M3FNUZ]>]>:$value, + Arguments<(ins AnyType:$value, Arg:$memref, Variadic:$indices, DefaultValuedAttr:$boundsCheck, diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index ed7b9ece4a464..896fdf1c899e3 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -289,7 +289,7 @@ class RewritePattern : public Pattern { using Pattern::Pattern; private: - /// Trait to check if T provides a `getOperationName` method. + /// Trait to check if T provides a `initialize` method. template using has_initialize = decltype(std::declval().initialize()); template diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index d164e87509796..9b61c4493994c 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -1049,8 +1049,8 @@ func.func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf3 // ----- -// CHECK-LABEL: @extract_element_0d -func.func @extract_element_0d(%a: vector) -> f32 { +// CHECK-LABEL: @extractelement_0d +func.func @extractelement_0d(%a: vector) -> f32 { // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: llvm.extractelement %{{.*}}[%[[C0]] : {{.*}}] : vector<1xf32> %1 = vector.extractelement %a[] : vector @@ -1059,31 +1059,54 @@ func.func @extract_element_0d(%a: vector) -> f32 { // ----- -func.func @extract_element(%arg0: vector<16xf32>) -> f32 { +func.func @extractelement(%arg0: vector<16xf32>) -> f32 { %0 = arith.constant 15 : i32 %1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32> return %1 : f32 } -// CHECK-LABEL: @extract_element( +// CHECK-LABEL: @extractelement( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[c:.*]] = arith.constant 15 : i32 // CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<16xf32> // CHECK: return %[[x]] : f32 +func.func @extractelement_scalable(%arg0: vector<[16]xf32>) -> f32 { + %0 = arith.constant 15 : i32 + %1 = vector.extractelement %arg0[%0 : i32]: vector<[16]xf32> + return %1 : f32 +} +// CHECK-LABEL: @extractelement_scalable( +// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) +// CHECK: %[[c:.*]] = arith.constant 15 : i32 +// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<[16]xf32> +// CHECK: return %[[x]] : f32 + // ----- -func.func @extract_element_index(%arg0: vector<16xf32>) -> f32 { +func.func @extractelement_index(%arg0: vector<16xf32>) -> f32 { %0 = arith.constant 15 : index %1 = vector.extractelement %arg0[%0 : index]: vector<16xf32> return %1 : f32 } -// CHECK-LABEL: @extract_element_index( +// CHECK-LABEL: @extractelement_index( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[c:.*]] = arith.constant 15 : index // CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 // CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[i]] : i64] : vector<16xf32> // CHECK: return %[[x]] : f32 +func.func @extractelement_index_scalable(%arg0: vector<[16]xf32>) -> f32 { + %0 = arith.constant 15 : index + %1 = vector.extractelement %arg0[%0 : index]: vector<[16]xf32> + return %1 : f32 +} +// CHECK-LABEL: @extractelement_index_scalable( +// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) +// CHECK: %[[c:.*]] = arith.constant 15 : index +// CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 +// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[i]] : i64] : vector<[16]xf32> +// CHECK: return %[[x]] : f32 + // ----- func.func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { @@ -1095,6 +1118,15 @@ func.func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: return {{.*}} : f32 +func.func @extract_element_from_vec_1d_scalable(%arg0: vector<[16]xf32>) -> f32 { + %0 = vector.extract %arg0[15]: f32 from vector<[16]xf32> + return %0 : f32 +} +// CHECK-LABEL: @extract_element_from_vec_1d_scalable +// CHECK: llvm.mlir.constant(15 : i64) : i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32> +// CHECK: return {{.*}} : f32 + // ----- func.func @extract_index_element_from_vec_1d(%arg0: vector<16xindex>) -> index { @@ -1109,6 +1141,18 @@ func.func @extract_index_element_from_vec_1d(%arg0: vector<16xindex>) -> index { // CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index // CHECK: return %[[T3]] : index +func.func @extract_index_element_from_vec_1d_scalable(%arg0: vector<[16]xindex>) -> index { + %0 = vector.extract %arg0[15]: index from vector<[16]xindex> + return %0 : index +} +// CHECK-LABEL: @extract_index_element_from_vec_1d_scalable( +// CHECK-SAME: %[[A:.*]]: vector<[16]xindex>) +// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[16]xindex> to vector<[16]xi64> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(15 : i64) : i64 +// CHECK: %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<[16]xi64> +// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index +// CHECK: return %[[T3]] : index + // ----- func.func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> { @@ -1119,6 +1163,14 @@ func.func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16x // CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: return {{.*}} : vector<3x16xf32> +func.func @extract_vec_2d_from_vec_3d_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<3x[16]xf32> { + %0 = vector.extract %arg0[0]: vector<3x[16]xf32> from vector<4x3x[16]xf32> + return %0 : vector<3x[16]xf32> +} +// CHECK-LABEL: @extract_vec_2d_from_vec_3d_scalable +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> +// CHECK: return {{.*}} : vector<3x[16]xf32> + // ----- func.func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf32> { @@ -1129,6 +1181,14 @@ func.func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf3 // CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: return {{.*}} : vector<16xf32> +func.func @extract_vec_1d_from_vec_3d_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<[16]xf32> { + %0 = vector.extract %arg0[0, 0]: vector<[16]xf32> from vector<4x3x[16]xf32> + return %0 : vector<[16]xf32> +} +// CHECK-LABEL: @extract_vec_1d_from_vec_3d_scalable +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> +// CHECK: return {{.*}} : vector<[16]xf32> + // ----- func.func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { @@ -1141,6 +1201,16 @@ func.func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: return {{.*}} : f32 +func.func @extract_element_from_vec_3d_scalable(%arg0: vector<4x3x[16]xf32>) -> f32 { + %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x[16]xf32> + return %0 : f32 +} +// CHECK-LABEL: @extract_element_from_vec_3d_scalable +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> +// CHECK: llvm.mlir.constant(0 : i64) : i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32> +// CHECK: return {{.*}} : f32 + // ----- func.func @extract_element_with_value_1d(%arg0: vector<16xf32>, %arg1: index) -> f32 { @@ -1152,6 +1222,15 @@ func.func @extract_element_with_value_1d(%arg0: vector<16xf32>, %arg1: index) -> // CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 // CHECK: llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<16xf32> +func.func @extract_element_with_value_1d_scalable(%arg0: vector<[16]xf32>, %arg1: index) -> f32 { + %0 = vector.extract %arg0[%arg1]: f32 from vector<[16]xf32> + return %0 : f32 +} +// CHECK-LABEL: @extract_element_with_value_1d_scalable +// CHECK-SAME: %[[VEC:.+]]: vector<[16]xf32>, %[[INDEX:.+]]: index +// CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 +// CHECK: llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<[16]xf32> + // ----- func.func @extract_element_with_value_2d(%arg0: vector<1x16xf32>, %arg1: index) -> f32 { From 6a2ac00a8424a4402475e2b7972bfb01330c3bf8 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 16:10:38 +0000 Subject: [PATCH 2/3] Only run instcombine in test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 416 ++++++++++++++++-- 1 file changed, 379 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index ce81c5d7e3626..8a6bf44b884a2 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s +; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -%struct.widget = type { %struct.baz, i8, [7 x i8] } -%struct.baz = type { %struct.snork } -%struct.snork = type { [8 x i8] } - -define void @spam(ptr %arg) { +define void @spam(ptr %arg) personality ptr null { ; CHECK-LABEL: define void @spam( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -49,11 +45,55 @@ define void @spam(ptr %arg) { ; CHECK-NEXT: ret void ; bb: - call void @barney(ptr %arg) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb + store i64 1, ptr %arg, align 8 + br label %barney.exit + +bb3.i: ; preds = %bb + %load.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i = icmp eq i32 %load.i.i, 0 + br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + br label %bb1.i + +bb1.i: ; preds = %spam.exit.i, %bb2.i.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i: ; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit: ; No predecessors! + br label %barney.exit + +bb3.i.i: ; preds = %bb3.i + %load.i.i1 = load volatile i1, ptr null, align 1 + br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 + +bb3.i.i2: ; preds = %bb3.i.i + call void @snork() + unreachable + +quux.exit: ; preds = %bb3.i.i + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i ret void } -define ptr @zot(ptr %arg) { +define ptr @zot(ptr %arg) personality ptr null { ; CHECK-LABEL: define ptr @zot( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -63,7 +103,9 @@ define ptr @zot(ptr %arg) { ; CHECK-NEXT: ret ptr null ; bb: - %call = call ptr @ham.8(ptr %arg) + %load.i.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i.i, align 4 ret ptr null } @@ -86,7 +128,7 @@ define ptr @wombat.1(ptr %arg) { ; CHECK-NEXT: ret ptr null ; bb: - %call = call ptr @foo.9(ptr %arg) + store i64 1, ptr %arg, align 8 ret ptr null } @@ -103,7 +145,15 @@ define void @quux() personality ptr null { ; CHECK-NEXT: ret void ; bb: - call void @wobble() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %wibble.exit, label %bb3.i + +bb3.i: ; preds = %bb + call void @snork() + unreachable + +wibble.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -120,7 +170,15 @@ define void @wobble() personality ptr null { ; CHECK-NEXT: ret void ; bb: - call void @quux.3() + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb + call void @snork() + unreachable + +wobble.2.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -141,12 +199,20 @@ define void @eggs() personality ptr null { ; CHECK-NEXT: br label %[[BB1]] ; bb: - %alloca = alloca %struct.widget, align 8 br label %bb1 -bb1: ; preds = %bb1, %bb - call void @spam(ptr %alloca) - %call = call ptr @zot(ptr %alloca) +bb1: ; preds = %spam.exit, %bb + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %spam.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb1 + call void @zot.4() + br label %spam.exit + +spam.exit: ; preds = %bb1, %bb3.i.i + %alloca.sroa.0.1 = phi i64 [ 0, %bb3.i.i ], [ 1, %bb1 ] + %0 = inttoptr i64 %alloca.sroa.0.1 to ptr + store i32 0, ptr %0, align 4 br label %bb1 } @@ -167,7 +233,22 @@ define void @wobble.2() personality ptr null { ; CHECK-NEXT: ret void ; bb: - call void @wibble(ptr null) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb + %inttoptr.i = inttoptr i64 0 to ptr + store ptr %inttoptr.i, ptr null, align 8 + br label %wibble.exit + +bb3.i: ; preds = %bb + call void @snork() + unreachable + +foo.exit: ; No predecessors! + br label %wibble.exit + +wibble.exit: ; preds = %bb2.i, %foo.exit ret void } @@ -188,7 +269,21 @@ define void @quux.3() personality ptr null { ; CHECK-NEXT: ret void ; bb: - call void @wobble.2() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb + store ptr null, ptr null, align 8 + br label %wibble.exit + +bb3.i: ; preds = %bb + call void @snork() + unreachable + +wombat.exit: ; No predecessors! + br label %wibble.exit + +wibble.exit: ; preds = %bb2.i, %wombat.exit ret void } @@ -228,11 +323,50 @@ define void @zot.4() personality ptr null { ; CHECK-NEXT: ret void ; bb: - call void @blam() + %load.i = load volatile i32, ptr null, align 4 + %icmp.i = icmp eq i32 %load.i, 0 + br i1 %icmp.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb + br label %bb1.i + +bb1.i: ; preds = %spam.exit.i, %bb2.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i: ; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb1.i, %bb3.i.i.i + %alloca.i.sroa.0.1 = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.i.sroa.0.1 to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit: ; No predecessors! + br label %blam.exit + +bb3.i: ; preds = %bb + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + store ptr null, ptr null, align 8 + br label %blam.exit + +bb3.i.i: ; preds = %bb3.i + call void @snork() + unreachable + +wombat.exit: ; No predecessors! + br label %blam.exit + +blam.exit: ; preds = %wombat.exit, %bb2.i.i, %eggs.exit ret void } -define void @blam() { +define void @blam() personality ptr null { ; CHECK-LABEL: define void @blam() personality ptr null { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr null, align 4 @@ -295,15 +429,72 @@ bb: %icmp = icmp eq i32 %load, 0 br i1 %icmp, label %bb2, label %bb3 -bb1: ; preds = %bb3, %bb2 +bb1: ; preds = %wobble.2.exit, %eggs.exit ret void bb2: ; preds = %bb - call void @eggs() + br label %bb1.i + +bb1.i: ; preds = %spam.exit, %bb2 + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %spam.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb1.i + %load.i.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i.i = icmp eq i32 %load.i.i.i, 0 + br i1 %icmp.i.i.i, label %bb2.i.i.i, label %bb3.i.i.i + +bb2.i.i.i: ; preds = %bb3.i.i + br label %bb1.i1 + +bb1.i1: ; preds = %spam.exit.i, %bb2.i.i.i + %load.i.i.i2 = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i2, label %spam.exit.i, label %bb3.i.i.i3 + +bb3.i.i.i3: ; preds = %bb1.i1 + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i3, %bb1.i1 + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i3 ], [ 1, %bb1.i1 ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i1 + +eggs.exit4: ; No predecessors! + br label %spam.exit + +bb3.i.i.i: ; preds = %bb3.i.i + %load.i.i5 = load volatile i1, ptr null, align 1 + br i1 %load.i.i5, label %quux.exit, label %bb3.i.i6 + +bb3.i.i6: ; preds = %bb3.i.i.i + call void @snork() + unreachable + +quux.exit: ; preds = %bb3.i.i.i + store ptr null, ptr null, align 8 + br label %spam.exit + +spam.exit: ; preds = %eggs.exit4, %quux.exit, %bb1.i + %alloca.i.sroa.0.1 = phi i64 [ 1, %bb1.i ], [ 0, %quux.exit ], [ 0, %eggs.exit4 ] + %1 = inttoptr i64 %alloca.i.sroa.0.1 to ptr + store i32 0, ptr %1, align 4 + br label %bb1.i + +eggs.exit: ; No predecessors! br label %bb1 bb3: ; preds = %bb - call void @quux() + %load.i.i7 = load volatile i1, ptr null, align 1 + br i1 %load.i.i7, label %wobble.2.exit, label %bb3.i.i8 + +bb3.i.i8: ; preds = %bb3 + call void @snork() + unreachable + +wobble.2.exit: ; preds = %bb3 + store ptr null, ptr null, align 8 br label %bb1 } @@ -372,15 +563,75 @@ bb: %load = load volatile i1, ptr null, align 1 br i1 %load, label %bb2, label %bb3 -bb1: ; preds = %bb3, %bb2 +bb1: ; preds = %blam.exit, %bb2 ret void bb2: ; preds = %bb - %call = call ptr @wombat.1(ptr %arg) + store i64 1, ptr %arg, align 8 br label %bb1 bb3: ; preds = %bb + %load.i = load volatile i32, ptr null, align 4 + %icmp.i = icmp eq i32 %load.i, 0 + br i1 %icmp.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb3 + br label %bb1.i.i + +bb1.i.i: ; preds = %spam.exit.i, %bb2.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i: ; preds = %bb1.i.i + %load.i.i.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i.i.i = icmp eq i32 %load.i.i.i.i, 0 + br i1 %icmp.i.i.i.i, label %bb2.i.i.i.i, label %bb3.i.i.i.i + +bb2.i.i.i.i: ; preds = %bb3.i.i.i + br label %bb1.i1.i + +bb1.i1.i: ; preds = %spam.exit.i.i, %bb2.i.i.i.i + %load.i.i.i2.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i2.i, label %spam.exit.i.i, label %bb3.i.i.i3.i + +bb3.i.i.i3.i: ; preds = %bb1.i1.i call void @zot.4() + br label %spam.exit.i.i + +spam.exit.i.i: ; preds = %bb3.i.i.i3.i, %bb1.i1.i + %alloca.sroa.0.1.i.i = phi i64 [ 0, %bb3.i.i.i3.i ], [ 1, %bb1.i1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i1.i + +bb3.i.i.i.i: ; preds = %bb3.i.i.i + %load.i.i5.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i5.i, label %quux.exit.i, label %bb3.i.i6.i + +bb3.i.i6.i: ; preds = %bb3.i.i.i.i + call void @snork() + unreachable + +quux.exit.i: ; preds = %bb3.i.i.i.i + store ptr null, ptr null, align 8 + br label %spam.exit.i + +spam.exit.i: ; preds = %quux.exit.i, %bb1.i.i + %alloca.i.sroa.0.1.i = phi i64 [ 1, %bb1.i.i ], [ 0, %quux.exit.i ] + %1 = inttoptr i64 %alloca.i.sroa.0.1.i to ptr + store i32 0, ptr %1, align 4 + br label %bb1.i.i + +bb3.i: ; preds = %bb3 + %load.i.i7.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i7.i, label %blam.exit, label %bb3.i.i8.i + +bb3.i.i8.i: ; preds = %bb3.i + call void @snork() + unreachable + +blam.exit: ; preds = %bb3.i + store ptr null, ptr null, align 8 br label %bb1 } @@ -388,7 +639,9 @@ define void @snork() personality ptr null { ; CHECK-LABEL: define void @snork() personality ptr null { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BARNEY_EXIT:.*]], label %[[BB3_I:.*]] +; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] +; CHECK: [[BB2_I]]: +; CHECK-NEXT: br label %[[BARNEY_EXIT:.*]] ; CHECK: [[BB3_I]]: ; CHECK-NEXT: [[LOAD_I1:%.*]] = load volatile i32, ptr null, align 4 ; CHECK-NEXT: [[ICMP_I:%.*]] = icmp eq i32 [[LOAD_I1]], 0 @@ -433,16 +686,92 @@ define void @snork() personality ptr null { ; CHECK-NEXT: br label %[[BB1_I_I]] ; CHECK: [[BLAM_EXIT]]: ; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[BARNEY_EXIT]], label %[[BB3_I_I:.*]] +; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[WOBBLE_2_EXIT:.*]], label %[[BB3_I_I:.*]] ; CHECK: [[BB3_I_I]]: ; CHECK-NEXT: call void @snork() ; CHECK-NEXT: unreachable +; CHECK: [[WOBBLE_2_EXIT]]: +; CHECK-NEXT: br label %[[BARNEY_EXIT]] ; CHECK: [[BARNEY_EXIT]]: ; CHECK-NEXT: store ptr poison, ptr null, align 8 ; CHECK-NEXT: ret void ; bb: - call void @barney(ptr null) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i: ; preds = %bb + store i64 1, ptr null, align 8 + br label %barney.exit + +bb3.i: ; preds = %bb + %load.i1 = load volatile i32, ptr null, align 4 + %icmp.i = icmp eq i32 %load.i1, 0 + br i1 %icmp.i, label %bb2.i3, label %blam.exit + +bb2.i3: ; preds = %bb3.i + br label %bb1.i.i + +bb1.i.i: ; preds = %spam.exit.i, %bb2.i3 + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i: ; preds = %bb1.i.i + %load.i.i.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i.i.i = icmp eq i32 %load.i.i.i.i, 0 + br i1 %icmp.i.i.i.i, label %bb2.i.i.i.i, label %bb3.i.i.i.i + +bb2.i.i.i.i: ; preds = %bb3.i.i.i + br label %bb1.i + +bb1.i: ; preds = %spam.exit.i8, %bb2.i.i.i.i + %load.i.i.i6 = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i6, label %spam.exit.i8, label %bb3.i.i.i7 + +bb3.i.i.i7: ; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i8 + +spam.exit.i8: ; preds = %bb1.i, %bb3.i.i.i7 + %alloca.i.sroa.0.1 = phi i64 [ 0, %bb3.i.i.i7 ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.i.sroa.0.1 to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit: ; No predecessors! + br label %spam.exit.i + +bb3.i.i.i.i: ; preds = %bb3.i.i.i + %load.i.i4 = load volatile i1, ptr null, align 1 + br i1 %load.i.i4, label %quux.exit, label %bb3.i.i5 + +bb3.i.i5: ; preds = %bb3.i.i.i.i + call void @snork() + unreachable + +quux.exit: ; preds = %bb3.i.i.i.i + store ptr null, ptr null, align 8 + br label %spam.exit.i + +spam.exit.i: ; preds = %eggs.exit, %quux.exit, %bb1.i.i + %alloca.i.i.sroa.0.1 = phi i64 [ 1, %bb1.i.i ], [ 0, %quux.exit ], [ 0, %eggs.exit ] + %1 = inttoptr i64 %alloca.i.i.sroa.0.1 to ptr + store i32 0, ptr %1, align 4 + br label %bb1.i.i + +blam.exit: ; preds = %bb3.i + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %blam.exit + call void @snork() + unreachable + +wobble.2.exit: ; preds = %blam.exit + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %bb2.i, %wobble.2.exit ret void } @@ -467,17 +796,19 @@ bb: %load = load volatile i1, ptr null, align 1 br i1 %load, label %bb2, label %bb3 -bb1: ; preds = %bb3, %bb2 +bb1: ; preds = %wombat.exit, %bb2 ret void bb2: ; preds = %bb - %call = call i64 @zot.5(i64 0) - %inttoptr = inttoptr i64 %call to ptr + %inttoptr = inttoptr i64 0 to ptr store ptr %inttoptr, ptr %arg, align 8 br label %bb1 bb3: ; preds = %bb - %call4 = call i64 @foo() + call void @snork() + unreachable + +wombat.exit: ; No predecessors! br label %bb1 } @@ -491,7 +822,7 @@ bb: ret i64 %arg } -define i64 @foo() { +define i64 @foo() personality ptr null { ; CHECK-LABEL: define i64 @foo() personality ptr null { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: call void @snork() @@ -500,7 +831,10 @@ define i64 @foo() { ; CHECK-NEXT: ret i64 0 ; bb: - call void @wombat() + call void @snork() + unreachable + +wombat.exit: ; No predecessors! ret i64 0 } @@ -514,7 +848,9 @@ define ptr @ham(ptr %arg) { ; CHECK-NEXT: ret ptr null ; bb: - %call = call ptr @foo.7(ptr %arg) + %load.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i, align 4 ret ptr null } @@ -544,7 +880,9 @@ define ptr @foo.7(ptr %arg) { ; CHECK-NEXT: ret ptr null ; bb: - call void @quux.6(ptr %arg) + %load.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i, align 4 ret ptr null } @@ -558,7 +896,9 @@ define ptr @ham.8(ptr %arg) personality ptr null { ; CHECK-NEXT: ret ptr null ; bb: - %call = call ptr @ham(ptr %arg) + %load.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i, align 4 ret ptr null } @@ -574,3 +914,5 @@ bb: ret ptr null } + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) From b21d8028d4ea9f3107d6dade75cc0df416207dc5 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 17:31:39 +0000 Subject: [PATCH 3/3] Reduce test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 912 +----------------- 1 file changed, 19 insertions(+), 893 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index 8a6bf44b884a2..6c98cc8a1c900 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,918 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s +; RUN: opt -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -define void @spam(ptr %arg) personality ptr null { -; CHECK-LABEL: define void @spam( -; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: store i64 1, ptr [[ARG]], align 8 -; CHECK-NEXT: br label %[[BARNEY_EXIT:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I_I:%.*]] = icmp eq i32 [[LOAD_I_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I_I]], label %[[BB2_I_I:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB2_I_I]]: -; CHECK-NEXT: br label %[[BB1_I:.*]] -; CHECK: [[BB1_I]]: -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I]], label %[[SPAM_EXIT_I:.*]], label %[[BB3_I_I_I:.*]] -; CHECK: [[BB3_I_I_I]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[SPAM_EXIT_I]]: -; CHECK-NEXT: [[ALLOCA_SROA_0_1_I:%.*]] = phi i64 [ 0, %[[BB3_I_I_I]] ], [ 1, %[[BB1_I]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_SROA_0_1_I]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1_I]] -; CHECK: [[EGGS_EXIT:.*:]] -; CHECK-NEXT: br label %[[BARNEY_EXIT]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: [[LOAD_I_I1:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I1]], label %[[QUUX_EXIT:.*]], label %[[BB3_I_I2:.*]] -; CHECK: [[BB3_I_I2]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[QUUX_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[BARNEY_EXIT]] -; CHECK: [[BARNEY_EXIT]]: -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i1, ptr null, align 1 - br i1 %load.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb - store i64 1, ptr %arg, align 8 - br label %barney.exit - -bb3.i: ; preds = %bb - %load.i.i = load volatile i32, ptr null, align 4 - %icmp.i.i = icmp eq i32 %load.i.i, 0 - br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i - -bb2.i.i: ; preds = %bb3.i - br label %bb1.i - -bb1.i: ; preds = %spam.exit.i, %bb2.i.i - %load.i.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i - -bb3.i.i.i: ; preds = %bb1.i - call void @zot.4() - br label %spam.exit.i - -spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i - %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] - %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr - store i32 0, ptr %0, align 4 - br label %bb1.i - -eggs.exit: ; No predecessors! - br label %barney.exit - -bb3.i.i: ; preds = %bb3.i - %load.i.i1 = load volatile i1, ptr null, align 1 - br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 - -bb3.i.i2: ; preds = %bb3.i.i - call void @snork() - unreachable - -quux.exit: ; preds = %bb3.i.i - store ptr null, ptr null, align 8 - br label %barney.exit - -barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i - ret void -} - -define ptr @zot(ptr %arg) personality ptr null { -; CHECK-LABEL: define ptr @zot( -; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I_I_I_I:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: store i32 0, ptr [[LOAD_I_I_I_I]], align 4 -; CHECK-NEXT: ret ptr null -; -bb: - %load.i.i.i.i = load ptr, ptr %arg, align 8 - store ptr null, ptr %arg, align 8 - store i32 0, ptr %load.i.i.i.i, align 4 - ret ptr null -} - -define void @wombat() personality ptr null { -; CHECK-LABEL: define void @wombat() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; -bb: - call void @snork() - unreachable -} - -define ptr @wombat.1(ptr %arg) { -; CHECK-LABEL: define ptr @wombat.1( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: store i64 1, ptr [[ARG]], align 8 -; CHECK-NEXT: ret ptr null -; -bb: - store i64 1, ptr %arg, align 8 - ret ptr null -} - -define void @quux() personality ptr null { -; CHECK-LABEL: define void @quux() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[WIBBLE_EXIT:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WIBBLE_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i1, ptr null, align 1 - br i1 %load.i, label %wibble.exit, label %bb3.i - -bb3.i: ; preds = %bb - call void @snork() - unreachable - -wibble.exit: ; preds = %bb - store ptr null, ptr null, align 8 - ret void -} - -define void @wobble() personality ptr null { -; CHECK-LABEL: define void @wobble() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[WOBBLE_2_EXIT:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOBBLE_2_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: ret void -; -bb: - %load.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i - -bb3.i.i: ; preds = %bb - call void @snork() - unreachable - -wobble.2.exit: ; preds = %bb - store ptr null, ptr null, align 8 - ret void -} - -define void @eggs() personality ptr null { -; CHECK-LABEL: define void @eggs() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] +define void @f1(i1 %a) { +; CHECK-LABEL: define void @f1( +; CHECK-SAME: i1 [[A:%.*]]) { ; CHECK-NEXT: br label %[[BB1:.*]] ; CHECK: [[BB1]]: -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[SPAM_EXIT:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT]] -; CHECK: [[SPAM_EXIT]]: -; CHECK-NEXT: [[ALLOCA_SROA_0_1:%.*]] = phi i64 [ 0, %[[BB3_I_I]] ], [ 1, %[[BB1]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_SROA_0_1]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1]] -; -bb: - br label %bb1 - -bb1: ; preds = %spam.exit, %bb - %load.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i, label %spam.exit, label %bb3.i.i - -bb3.i.i: ; preds = %bb1 - call void @zot.4() - br label %spam.exit - -spam.exit: ; preds = %bb1, %bb3.i.i - %alloca.sroa.0.1 = phi i64 [ 0, %bb3.i.i ], [ 1, %bb1 ] - %0 = inttoptr i64 %alloca.sroa.0.1 to ptr - store i32 0, ptr %0, align 4 - br label %bb1 -} - -define void @wobble.2() personality ptr null { -; CHECK-LABEL: define void @wobble.2() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[WIBBLE_EXIT:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[FOO_EXIT:.*:]] -; CHECK-NEXT: br label %[[WIBBLE_EXIT]] -; CHECK: [[WIBBLE_EXIT]]: -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i1, ptr null, align 1 - br i1 %load.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb - %inttoptr.i = inttoptr i64 0 to ptr - store ptr %inttoptr.i, ptr null, align 8 - br label %wibble.exit - -bb3.i: ; preds = %bb - call void @snork() - unreachable - -foo.exit: ; No predecessors! - br label %wibble.exit - -wibble.exit: ; preds = %bb2.i, %foo.exit - ret void -} - -define void @quux.3() personality ptr null { -; CHECK-LABEL: define void @quux.3() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[WIBBLE_EXIT:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOMBAT_EXIT:.*:]] -; CHECK-NEXT: br label %[[WIBBLE_EXIT]] -; CHECK: [[WIBBLE_EXIT]]: -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i1, ptr null, align 1 - br i1 %load.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb - store ptr null, ptr null, align 8 - br label %wibble.exit - -bb3.i: ; preds = %bb - call void @snork() - unreachable - -wombat.exit: ; No predecessors! - br label %wibble.exit - -wibble.exit: ; preds = %bb2.i, %wombat.exit - ret void -} - -define void @zot.4() personality ptr null { -; CHECK-LABEL: define void @zot.4() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I:%.*]] = icmp eq i32 [[LOAD_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: br label %[[BB1_I:.*]] -; CHECK: [[BB1_I]]: -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I]], label %[[SPAM_EXIT_I:.*]], label %[[BB3_I_I_I:.*]] -; CHECK: [[BB3_I_I_I]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[SPAM_EXIT_I]]: -; CHECK-NEXT: [[ALLOCA_I_SROA_0_1:%.*]] = phi i64 [ 0, %[[BB3_I_I_I]] ], [ 1, %[[BB1_I]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_I_SROA_0_1]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1_I]] -; CHECK: [[EGGS_EXIT:.*:]] -; CHECK-NEXT: br label %[[BLAM_EXIT:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[BB2_I_I:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB2_I_I]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[BLAM_EXIT]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOMBAT_EXIT:.*:]] -; CHECK-NEXT: br label %[[BLAM_EXIT]] -; CHECK: [[BLAM_EXIT]]: -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i32, ptr null, align 4 - %icmp.i = icmp eq i32 %load.i, 0 - br i1 %icmp.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb - br label %bb1.i - -bb1.i: ; preds = %spam.exit.i, %bb2.i - %load.i.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i - -bb3.i.i.i: ; preds = %bb1.i - call void @zot.4() - br label %spam.exit.i - -spam.exit.i: ; preds = %bb1.i, %bb3.i.i.i - %alloca.i.sroa.0.1 = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] - %0 = inttoptr i64 %alloca.i.sroa.0.1 to ptr - store i32 0, ptr %0, align 4 - br label %bb1.i - -eggs.exit: ; No predecessors! - br label %blam.exit - -bb3.i: ; preds = %bb - %load.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i, label %bb2.i.i, label %bb3.i.i - -bb2.i.i: ; preds = %bb3.i - store ptr null, ptr null, align 8 - br label %blam.exit - -bb3.i.i: ; preds = %bb3.i - call void @snork() - unreachable - -wombat.exit: ; No predecessors! - br label %blam.exit - -blam.exit: ; preds = %wombat.exit, %bb2.i.i, %eggs.exit - ret void -} - -define void @blam() personality ptr null { -; CHECK-LABEL: define void @blam() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[LOAD]], 0 -; CHECK-NEXT: br i1 [[ICMP]], label %[[BB2:.*]], label %[[BB3:.*]] -; CHECK: [[BB1:.*]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br i1 [[A]], label %[[BB3:.*]], label %[[BB2:.*]] ; CHECK: [[BB2]]: -; CHECK-NEXT: br label %[[BB1_I:.*]] -; CHECK: [[BB1_I]]: -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[SPAM_EXIT:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I_I_I:%.*]] = icmp eq i32 [[LOAD_I_I_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I_I_I]], label %[[BB2_I_I_I:.*]], label %[[BB3_I_I_I:.*]] -; CHECK: [[BB2_I_I_I]]: -; CHECK-NEXT: br label %[[BB1_I1:.*]] -; CHECK: [[BB1_I1]]: -; CHECK-NEXT: [[LOAD_I_I_I2:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I2]], label %[[SPAM_EXIT_I:.*]], label %[[BB3_I_I_I3:.*]] -; CHECK: [[BB3_I_I_I3]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[SPAM_EXIT_I]]: -; CHECK-NEXT: [[ALLOCA_SROA_0_1_I:%.*]] = phi i64 [ 0, %[[BB3_I_I_I3]] ], [ 1, %[[BB1_I1]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_SROA_0_1_I]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1_I1]] -; CHECK: [[EGGS_EXIT4:.*]]: -; CHECK-NEXT: br label %[[SPAM_EXIT]] -; CHECK: [[BB3_I_I_I]]: -; CHECK-NEXT: [[LOAD_I_I5:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I5]], label %[[QUUX_EXIT:.*]], label %[[BB3_I_I6:.*]] -; CHECK: [[BB3_I_I6]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[QUUX_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[SPAM_EXIT]] -; CHECK: [[SPAM_EXIT]]: -; CHECK-NEXT: [[ALLOCA_I_SROA_0_1:%.*]] = phi i64 [ 1, %[[BB1_I]] ], [ 0, %[[QUUX_EXIT]] ], [ 0, %[[EGGS_EXIT4]] ] -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[ALLOCA_I_SROA_0_1]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4 -; CHECK-NEXT: br label %[[BB1_I]] -; CHECK: [[EGGS_EXIT:.*:]] -; CHECK-NEXT: br label %[[BB1]] +; CHECK-NEXT: br label %[[BB3]] ; CHECK: [[BB3]]: -; CHECK-NEXT: [[LOAD_I_I7:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I7]], label %[[WOBBLE_2_EXIT:.*]], label %[[BB3_I_I8:.*]] -; CHECK: [[BB3_I_I8]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOBBLE_2_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[BB2]] ], [ 1, %[[BB1]] ] +; CHECK-NEXT: [[INTTOPTR:%.*]] = inttoptr i64 [[PHI]] to ptr +; CHECK-NEXT: store i32 0, ptr [[INTTOPTR]], align 4 ; CHECK-NEXT: br label %[[BB1]] ; -bb: - %load = load volatile i32, ptr null, align 4 - %icmp = icmp eq i32 %load, 0 - br i1 %icmp, label %bb2, label %bb3 - -bb1: ; preds = %wobble.2.exit, %eggs.exit - ret void - -bb2: ; preds = %bb - br label %bb1.i - -bb1.i: ; preds = %spam.exit, %bb2 - %load.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i, label %spam.exit, label %bb3.i.i - -bb3.i.i: ; preds = %bb1.i - %load.i.i.i = load volatile i32, ptr null, align 4 - %icmp.i.i.i = icmp eq i32 %load.i.i.i, 0 - br i1 %icmp.i.i.i, label %bb2.i.i.i, label %bb3.i.i.i - -bb2.i.i.i: ; preds = %bb3.i.i - br label %bb1.i1 - -bb1.i1: ; preds = %spam.exit.i, %bb2.i.i.i - %load.i.i.i2 = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i2, label %spam.exit.i, label %bb3.i.i.i3 - -bb3.i.i.i3: ; preds = %bb1.i1 - call void @zot.4() - br label %spam.exit.i - -spam.exit.i: ; preds = %bb3.i.i.i3, %bb1.i1 - %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i3 ], [ 1, %bb1.i1 ] - %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr - store i32 0, ptr %0, align 4 - br label %bb1.i1 - -eggs.exit4: ; No predecessors! - br label %spam.exit - -bb3.i.i.i: ; preds = %bb3.i.i - %load.i.i5 = load volatile i1, ptr null, align 1 - br i1 %load.i.i5, label %quux.exit, label %bb3.i.i6 - -bb3.i.i6: ; preds = %bb3.i.i.i - call void @snork() - unreachable - -quux.exit: ; preds = %bb3.i.i.i - store ptr null, ptr null, align 8 - br label %spam.exit - -spam.exit: ; preds = %eggs.exit4, %quux.exit, %bb1.i - %alloca.i.sroa.0.1 = phi i64 [ 1, %bb1.i ], [ 0, %quux.exit ], [ 0, %eggs.exit4 ] - %1 = inttoptr i64 %alloca.i.sroa.0.1 to ptr - store i32 0, ptr %1, align 4 - br label %bb1.i - -eggs.exit: ; No predecessors! - br label %bb1 - -bb3: ; preds = %bb - %load.i.i7 = load volatile i1, ptr null, align 1 - br i1 %load.i.i7, label %wobble.2.exit, label %bb3.i.i8 - -bb3.i.i8: ; preds = %bb3 - call void @snork() - unreachable - -wobble.2.exit: ; preds = %bb3 - store ptr null, ptr null, align 8 - br label %bb1 -} - -define void @barney(ptr %arg) personality ptr null { -; CHECK-LABEL: define void @barney( -; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD]], label %[[BB2:.*]], label %[[BB3:.*]] -; CHECK: [[BB1:.*]]: -; CHECK-NEXT: ret void -; CHECK: [[BB2]]: -; CHECK-NEXT: store i64 1, ptr [[ARG]], align 8 -; CHECK-NEXT: br label %[[BB1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I:%.*]] = icmp eq i32 [[LOAD_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: br label %[[BB1_I_I:.*]] -; CHECK: [[BB1_I_I]]: -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I]], label %[[SPAM_EXIT_I:.*]], label %[[BB3_I_I_I:.*]] -; CHECK: [[BB3_I_I_I]]: -; CHECK-NEXT: [[LOAD_I_I_I_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I_I_I_I:%.*]] = icmp eq i32 [[LOAD_I_I_I_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I_I_I_I]], label %[[BB2_I_I_I_I:.*]], label %[[BB3_I_I_I_I:.*]] -; CHECK: [[BB2_I_I_I_I]]: -; CHECK-NEXT: br label %[[BB1_I1_I:.*]] -; CHECK: [[BB1_I1_I]]: -; CHECK-NEXT: [[LOAD_I_I_I2_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I2_I]], label %[[SPAM_EXIT_I_I:.*]], label %[[BB3_I_I_I3_I:.*]] -; CHECK: [[BB3_I_I_I3_I]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT_I_I]] -; CHECK: [[SPAM_EXIT_I_I]]: -; CHECK-NEXT: [[ALLOCA_SROA_0_1_I_I:%.*]] = phi i64 [ 0, %[[BB3_I_I_I3_I]] ], [ 1, %[[BB1_I1_I]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_SROA_0_1_I_I]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1_I1_I]] -; CHECK: [[BB3_I_I_I_I]]: -; CHECK-NEXT: [[LOAD_I_I5_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I5_I]], label %[[QUUX_EXIT_I:.*]], label %[[BB3_I_I6_I:.*]] -; CHECK: [[BB3_I_I6_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[QUUX_EXIT_I]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[SPAM_EXIT_I]]: -; CHECK-NEXT: [[ALLOCA_I_SROA_0_1_I:%.*]] = phi i64 [ 1, %[[BB1_I_I]] ], [ 0, %[[QUUX_EXIT_I]] ] -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[ALLOCA_I_SROA_0_1_I]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4 -; CHECK-NEXT: br label %[[BB1_I_I]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: [[LOAD_I_I7_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I7_I]], label %[[BLAM_EXIT:.*]], label %[[BB3_I_I8_I:.*]] -; CHECK: [[BB3_I_I8_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[BLAM_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[BB1]] -; -bb: - %load = load volatile i1, ptr null, align 1 - br i1 %load, label %bb2, label %bb3 - -bb1: ; preds = %blam.exit, %bb2 - ret void - -bb2: ; preds = %bb - store i64 1, ptr %arg, align 8 br label %bb1 -bb3: ; preds = %bb - %load.i = load volatile i32, ptr null, align 4 - %icmp.i = icmp eq i32 %load.i, 0 - br i1 %icmp.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb3 - br label %bb1.i.i - -bb1.i.i: ; preds = %spam.exit.i, %bb2.i - %load.i.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i - -bb3.i.i.i: ; preds = %bb1.i.i - %load.i.i.i.i = load volatile i32, ptr null, align 4 - %icmp.i.i.i.i = icmp eq i32 %load.i.i.i.i, 0 - br i1 %icmp.i.i.i.i, label %bb2.i.i.i.i, label %bb3.i.i.i.i +bb1: + br i1 %a, label %bb3, label %bb2 -bb2.i.i.i.i: ; preds = %bb3.i.i.i - br label %bb1.i1.i +bb2: + br label %bb3 -bb1.i1.i: ; preds = %spam.exit.i.i, %bb2.i.i.i.i - %load.i.i.i2.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i2.i, label %spam.exit.i.i, label %bb3.i.i.i3.i - -bb3.i.i.i3.i: ; preds = %bb1.i1.i - call void @zot.4() - br label %spam.exit.i.i - -spam.exit.i.i: ; preds = %bb3.i.i.i3.i, %bb1.i1.i - %alloca.sroa.0.1.i.i = phi i64 [ 0, %bb3.i.i.i3.i ], [ 1, %bb1.i1.i ] - %0 = inttoptr i64 %alloca.sroa.0.1.i.i to ptr - store i32 0, ptr %0, align 4 - br label %bb1.i1.i - -bb3.i.i.i.i: ; preds = %bb3.i.i.i - %load.i.i5.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i5.i, label %quux.exit.i, label %bb3.i.i6.i - -bb3.i.i6.i: ; preds = %bb3.i.i.i.i - call void @snork() - unreachable - -quux.exit.i: ; preds = %bb3.i.i.i.i - store ptr null, ptr null, align 8 - br label %spam.exit.i - -spam.exit.i: ; preds = %quux.exit.i, %bb1.i.i - %alloca.i.sroa.0.1.i = phi i64 [ 1, %bb1.i.i ], [ 0, %quux.exit.i ] - %1 = inttoptr i64 %alloca.i.sroa.0.1.i to ptr - store i32 0, ptr %1, align 4 - br label %bb1.i.i - -bb3.i: ; preds = %bb3 - %load.i.i7.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i7.i, label %blam.exit, label %bb3.i.i8.i - -bb3.i.i8.i: ; preds = %bb3.i - call void @snork() - unreachable - -blam.exit: ; preds = %bb3.i - store ptr null, ptr null, align 8 +bb3: + %phi = phi i64 [ 0, %bb2 ], [ 1, %bb1 ] + %inttoptr = inttoptr i64 %phi to ptr + store i32 0, ptr %inttoptr, align 4 br label %bb1 } -define void @snork() personality ptr null { -; CHECK-LABEL: define void @snork() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I]], label %[[BB2_I:.*]], label %[[BB3_I:.*]] -; CHECK: [[BB2_I]]: -; CHECK-NEXT: br label %[[BARNEY_EXIT:.*]] -; CHECK: [[BB3_I]]: -; CHECK-NEXT: [[LOAD_I1:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I:%.*]] = icmp eq i32 [[LOAD_I1]], 0 -; CHECK-NEXT: br i1 [[ICMP_I]], label %[[BB2_I3:.*]], label %[[BLAM_EXIT:.*]] -; CHECK: [[BB2_I3]]: -; CHECK-NEXT: br label %[[BB1_I_I:.*]] -; CHECK: [[BB1_I_I]]: -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I]], label %[[SPAM_EXIT_I:.*]], label %[[BB3_I_I_I:.*]] -; CHECK: [[BB3_I_I_I]]: -; CHECK-NEXT: [[LOAD_I_I_I_I:%.*]] = load volatile i32, ptr null, align 4 -; CHECK-NEXT: [[ICMP_I_I_I_I:%.*]] = icmp eq i32 [[LOAD_I_I_I_I]], 0 -; CHECK-NEXT: br i1 [[ICMP_I_I_I_I]], label %[[BB2_I_I_I_I:.*]], label %[[BB3_I_I_I_I:.*]] -; CHECK: [[BB2_I_I_I_I]]: -; CHECK-NEXT: br label %[[BB1_I:.*]] -; CHECK: [[BB1_I]]: -; CHECK-NEXT: [[LOAD_I_I_I6:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I_I6]], label %[[SPAM_EXIT_I8:.*]], label %[[BB3_I_I_I7:.*]] -; CHECK: [[BB3_I_I_I7]]: -; CHECK-NEXT: call void @zot.4() -; CHECK-NEXT: br label %[[SPAM_EXIT_I8]] -; CHECK: [[SPAM_EXIT_I8]]: -; CHECK-NEXT: [[ALLOCA_I_SROA_0_1:%.*]] = phi i64 [ 0, %[[BB3_I_I_I7]] ], [ 1, %[[BB1_I]] ] -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[ALLOCA_I_SROA_0_1]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4 -; CHECK-NEXT: br label %[[BB1_I]] -; CHECK: [[EGGS_EXIT:.*]]: -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[BB3_I_I_I_I]]: -; CHECK-NEXT: [[LOAD_I_I4:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I4]], label %[[QUUX_EXIT:.*]], label %[[BB3_I_I5:.*]] -; CHECK: [[BB3_I_I5]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[QUUX_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: br label %[[SPAM_EXIT_I]] -; CHECK: [[SPAM_EXIT_I]]: -; CHECK-NEXT: [[ALLOCA_I_I_SROA_0_1:%.*]] = phi i64 [ 1, %[[BB1_I_I]] ], [ 0, %[[QUUX_EXIT]] ], [ 0, %[[EGGS_EXIT]] ] -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[ALLOCA_I_I_SROA_0_1]] to ptr -; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4 -; CHECK-NEXT: br label %[[BB1_I_I]] -; CHECK: [[BLAM_EXIT]]: -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD_I_I]], label %[[WOBBLE_2_EXIT:.*]], label %[[BB3_I_I:.*]] -; CHECK: [[BB3_I_I]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOBBLE_2_EXIT]]: -; CHECK-NEXT: br label %[[BARNEY_EXIT]] -; CHECK: [[BARNEY_EXIT]]: -; CHECK-NEXT: store ptr poison, ptr null, align 8 -; CHECK-NEXT: ret void -; -bb: - %load.i = load volatile i1, ptr null, align 1 - br i1 %load.i, label %bb2.i, label %bb3.i - -bb2.i: ; preds = %bb - store i64 1, ptr null, align 8 - br label %barney.exit - -bb3.i: ; preds = %bb - %load.i1 = load volatile i32, ptr null, align 4 - %icmp.i = icmp eq i32 %load.i1, 0 - br i1 %icmp.i, label %bb2.i3, label %blam.exit - -bb2.i3: ; preds = %bb3.i - br label %bb1.i.i - -bb1.i.i: ; preds = %spam.exit.i, %bb2.i3 - %load.i.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i - -bb3.i.i.i: ; preds = %bb1.i.i - %load.i.i.i.i = load volatile i32, ptr null, align 4 - %icmp.i.i.i.i = icmp eq i32 %load.i.i.i.i, 0 - br i1 %icmp.i.i.i.i, label %bb2.i.i.i.i, label %bb3.i.i.i.i - -bb2.i.i.i.i: ; preds = %bb3.i.i.i - br label %bb1.i - -bb1.i: ; preds = %spam.exit.i8, %bb2.i.i.i.i - %load.i.i.i6 = load volatile i1, ptr null, align 1 - br i1 %load.i.i.i6, label %spam.exit.i8, label %bb3.i.i.i7 - -bb3.i.i.i7: ; preds = %bb1.i - call void @zot.4() - br label %spam.exit.i8 - -spam.exit.i8: ; preds = %bb1.i, %bb3.i.i.i7 - %alloca.i.sroa.0.1 = phi i64 [ 0, %bb3.i.i.i7 ], [ 1, %bb1.i ] - %0 = inttoptr i64 %alloca.i.sroa.0.1 to ptr - store i32 0, ptr %0, align 4 - br label %bb1.i - -eggs.exit: ; No predecessors! - br label %spam.exit.i - -bb3.i.i.i.i: ; preds = %bb3.i.i.i - %load.i.i4 = load volatile i1, ptr null, align 1 - br i1 %load.i.i4, label %quux.exit, label %bb3.i.i5 - -bb3.i.i5: ; preds = %bb3.i.i.i.i - call void @snork() - unreachable - -quux.exit: ; preds = %bb3.i.i.i.i - store ptr null, ptr null, align 8 - br label %spam.exit.i - -spam.exit.i: ; preds = %eggs.exit, %quux.exit, %bb1.i.i - %alloca.i.i.sroa.0.1 = phi i64 [ 1, %bb1.i.i ], [ 0, %quux.exit ], [ 0, %eggs.exit ] - %1 = inttoptr i64 %alloca.i.i.sroa.0.1 to ptr - store i32 0, ptr %1, align 4 - br label %bb1.i.i - -blam.exit: ; preds = %bb3.i - %load.i.i = load volatile i1, ptr null, align 1 - br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i - -bb3.i.i: ; preds = %blam.exit - call void @snork() - unreachable - -wobble.2.exit: ; preds = %blam.exit - store ptr null, ptr null, align 8 - br label %barney.exit - -barney.exit: ; preds = %bb2.i, %wobble.2.exit - ret void -} - -define void @wibble(ptr %arg) personality ptr null { -; CHECK-LABEL: define void @wibble( -; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { +define void @f2() { +; CHECK-LABEL: define void @f2() { ; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load volatile i1, ptr null, align 1 -; CHECK-NEXT: br i1 [[LOAD]], label %[[BB2:.*]], label %[[BB3:.*]] -; CHECK: [[BB1:.*]]: ; CHECK-NEXT: ret void -; CHECK: [[BB2]]: -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: br label %[[BB1]] -; CHECK: [[BB3]]: -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOMBAT_EXIT:.*:]] -; CHECK-NEXT: br label %[[BB1]] ; bb: - %load = load volatile i1, ptr null, align 1 - br i1 %load, label %bb2, label %bb3 - -bb1: ; preds = %wombat.exit, %bb2 - ret void - -bb2: ; preds = %bb %inttoptr = inttoptr i64 0 to ptr - store ptr %inttoptr, ptr %arg, align 8 - br label %bb1 - -bb3: ; preds = %bb - call void @snork() - unreachable - -wombat.exit: ; No predecessors! - br label %bb1 -} - -define i64 @zot.5(i64 %arg) { -; CHECK-LABEL: define i64 @zot.5( -; CHECK-SAME: i64 [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: ret i64 [[ARG]] -; -bb: - ret i64 %arg -} - -define i64 @foo() personality ptr null { -; CHECK-LABEL: define i64 @foo() personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: call void @snork() -; CHECK-NEXT: unreachable -; CHECK: [[WOMBAT_EXIT:.*:]] -; CHECK-NEXT: ret i64 0 -; -bb: - call void @snork() - unreachable - -wombat.exit: ; No predecessors! - ret i64 0 -} - -define ptr @ham(ptr %arg) { -; CHECK-LABEL: define ptr @ham( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I_I:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: store i32 0, ptr [[LOAD_I_I]], align 4 -; CHECK-NEXT: ret ptr null -; -bb: - %load.i.i = load ptr, ptr %arg, align 8 - store ptr null, ptr %arg, align 8 - store i32 0, ptr %load.i.i, align 4 - ret ptr null -} - -define void @quux.6(ptr %arg) { -; CHECK-LABEL: define void @quux.6( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: store i32 0, ptr [[LOAD]], align 4 -; CHECK-NEXT: ret void -; -bb: - %load = load ptr, ptr %arg, align 8 - store ptr null, ptr %arg, align 8 - store i32 0, ptr %load, align 4 ret void } - -define ptr @foo.7(ptr %arg) { -; CHECK-LABEL: define ptr @foo.7( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: store i32 0, ptr [[LOAD_I]], align 4 -; CHECK-NEXT: ret ptr null -; -bb: - %load.i = load ptr, ptr %arg, align 8 - store ptr null, ptr %arg, align 8 - store i32 0, ptr %load.i, align 4 - ret ptr null -} - -define ptr @ham.8(ptr %arg) personality ptr null { -; CHECK-LABEL: define ptr @ham.8( -; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[LOAD_I_I_I:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: store ptr null, ptr [[ARG]], align 8 -; CHECK-NEXT: store i32 0, ptr [[LOAD_I_I_I]], align 4 -; CHECK-NEXT: ret ptr null -; -bb: - %load.i.i.i = load ptr, ptr %arg, align 8 - store ptr null, ptr %arg, align 8 - store i32 0, ptr %load.i.i.i, align 4 - ret ptr null -} - -define ptr @foo.9(ptr %arg) { -; CHECK-LABEL: define ptr @foo.9( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: store i64 1, ptr [[ARG]], align 8 -; CHECK-NEXT: ret ptr null -; -bb: - store i64 1, ptr %arg, align 8 - ret ptr null -} - - -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)