From 4bbf1f460eb3fabdb7cf7cd731af0c227e6539c8 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 29 Sep 2023 20:10:07 +0200 Subject: [PATCH] Vendor import of llvm-project branch release/17.x llvmorg-17.0.1-25-g098e653a5bed. --- .../DependencyScanningFilesystem.h | 18 ++++- clang/lib/AST/ExprConstant.cpp | 17 ++-- clang/lib/CodeGen/CGCall.cpp | 24 ------ clang/lib/CodeGen/CGCoroutine.cpp | 33 -------- clang/lib/CodeGen/CodeGenFunction.h | 5 -- clang/lib/Frontend/TextDiagnostic.cpp | 3 +- .../Headers/cuda_wrappers/bits/basic_string.h | 9 +++ .../cuda_wrappers/bits/basic_string.tcc | 9 +++ clang/lib/Sema/SemaChecking.cpp | 6 +- .../DependencyScanningFilesystem.cpp | 79 +++++++++++++++---- libcxx/include/__config | 2 +- lld/COFF/Writer.cpp | 2 +- llvm/include/llvm/Transforms/Utils/Local.h | 10 +++ llvm/lib/Analysis/InlineCost.cpp | 20 ++--- llvm/lib/CodeGen/StackColoring.cpp | 62 ++++----------- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 2 +- llvm/lib/Target/ARM/ARMInstrThumb2.td | 2 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 28 +++++-- llvm/lib/Transforms/Scalar/GVN.cpp | 7 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 23 +++--- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 61 +++++++------- llvm/lib/Transforms/Utils/Local.cpp | 31 ++++++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 12 ++- llvm/tools/lli/lli.cpp | 14 ++++ llvm/tools/llvm-readobj/COFFDumper.cpp | 30 ++++--- 26 files changed, 289 insertions(+), 224 deletions(-) create mode 100644 clang/lib/Headers/cuda_wrappers/bits/basic_string.h create mode 100644 clang/lib/Headers/cuda_wrappers/bits/basic_string.tcc diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 4b4e3c7eb2ec..dbe219b6dd8d 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -215,6 +215,7 @@ class DependencyScanningFilesystemLocalCache { public: /// Returns entry associated with the filename or nullptr if none is found. const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { + assert(llvm::sys::path::is_absolute_gnu(Filename)); auto It = Cache.find(Filename); return It == Cache.end() ? nullptr : It->getValue(); } @@ -224,6 +225,7 @@ class DependencyScanningFilesystemLocalCache { const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry) { + assert(llvm::sys::path::is_absolute_gnu(Filename)); const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second; assert(InsertedEntry == &Entry && "entry already present"); return *InsertedEntry; @@ -282,13 +284,14 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { public: DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, - IntrusiveRefCntPtr FS) - : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} + IntrusiveRefCntPtr FS); llvm::ErrorOr status(const Twine &Path) override; llvm::ErrorOr> openFileForRead(const Twine &Path) override; + std::error_code setCurrentWorkingDirectory(const Twine &Path) override; + /// Returns entry for the given filename. /// /// Attempts to use the local and shared caches first, then falls back to @@ -304,8 +307,11 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the /// shared cache indexed by unique ID, or creates new entry from scratch. + /// \p FilenameForLookup will always be an absolute path, and different than + /// \p OriginalFilename if \p OriginalFilename is relative. llvm::ErrorOr - computeAndStoreResult(StringRef Filename); + computeAndStoreResult(StringRef OriginalFilename, + StringRef FilenameForLookup); /// Scan for preprocessor directives for the given entry if necessary and /// returns a wrapper object with reference semantics. @@ -388,6 +394,12 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. DependencyScanningFilesystemLocalCache LocalCache; + + /// The working directory to use for making relative paths absolute before + /// using them for cache lookups. + llvm::ErrorOr WorkingDirForCacheLookup; + + void updateWorkingDirForCacheLookup(); }; } // end namespace dependencies diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f1bad0c7f7f2..c62044f36194 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -6013,8 +6013,9 @@ const AccessKinds StartLifetimeOfUnionMemberHandler::AccessKind; /// operator whose left-hand side might involve a union member access. If it /// does, implicitly start the lifetime of any accessed union elements per /// C++20 [class.union]5. -static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr, - const LValue &LHS) { +static bool MaybeHandleUnionActiveMemberChange(EvalInfo &Info, + const Expr *LHSExpr, + const LValue &LHS) { if (LHS.InvalidBase || LHS.Designator.Invalid) return false; @@ -6069,8 +6070,14 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr, break; // Walk path backwards as we walk up from the base to the derived class. for (const CXXBaseSpecifier *Elt : llvm::reverse(ICE->path())) { + if (Elt->isVirtual()) { + // A class with virtual base classes never has a trivial default + // constructor, so S(E) is empty in this case. + E = nullptr; + break; + } + --PathLength; - (void)Elt; assert(declaresSameEntity(Elt->getType()->getAsCXXRecordDecl(), LHS.Designator.Entries[PathLength] .getAsBaseOrMember().getPointer())); @@ -7748,7 +7755,7 @@ class ExprEvaluatorBase // per C++20 [class.union]5. if (Info.getLangOpts().CPlusPlus20 && OCE && OCE->getOperator() == OO_Equal && MD->isTrivial() && - !HandleUnionActiveMemberChange(Info, Args[0], ThisVal)) + !MaybeHandleUnionActiveMemberChange(Info, Args[0], ThisVal)) return false; Args = Args.slice(1); @@ -8621,7 +8628,7 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { return false; if (Info.getLangOpts().CPlusPlus20 && - !HandleUnionActiveMemberChange(Info, E->getLHS(), Result)) + !MaybeHandleUnionActiveMemberChange(Info, E->getLHS(), Result)) return false; return handleAssignment(this->Info, E, Result, E->getLHS()->getType(), diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 0d1e9ad439b7..6b8af9bf18c1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5487,30 +5487,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } - // The await_suspend call performed by co_await is essentially asynchronous - // to the execution of the coroutine. Inlining it normally into an unsplit - // coroutine can cause miscompilation because the coroutine CFG misrepresents - // the true control flow of the program: things that happen in the - // await_suspend are not guaranteed to happen prior to the resumption of the - // coroutine, and things that happen after the resumption of the coroutine - // (including its exit and the potential deallocation of the coroutine frame) - // are not guaranteed to happen only after the end of await_suspend. - // - // The short-term solution to this problem is to mark the call as uninlinable. - // But we don't want to do this if the call is known to be trivial, which is - // very common. - // - // The long-term solution may introduce patterns like: - // - // call @llvm.coro.await_suspend(ptr %awaiter, ptr %handle, - // ptr @awaitSuspendFn) - // - // Then it is much easier to perform the safety analysis in the middle end. - // If it is safe to inline the call to awaitSuspend, we can replace it in the - // CoroEarly pass. Otherwise we could replace it in the CoroSplit pass. - if (inSuspendBlock() && mayCoroHandleEscape()) - Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); - // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 810ae7d51ec1..8437cda79beb 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -139,36 +139,6 @@ static bool memberCallExpressionCanThrow(const Expr *E) { return true; } -/// Return true when the coroutine handle may escape from the await-suspend -/// (`awaiter.await_suspend(std::coroutine_handle)` expression). -/// Return false only when the coroutine wouldn't escape in the await-suspend -/// for sure. -/// -/// While it is always safe to return true, return falses can bring better -/// performances. -/// -/// See https://github.com/llvm/llvm-project/issues/56301 and -/// https://reviews.llvm.org/D157070 for the example and the full discussion. -/// -/// FIXME: It will be much better to perform such analysis in the middle end. -/// See the comments in `CodeGenFunction::EmitCall` for example. -static bool MayCoroHandleEscape(CoroutineSuspendExpr const &S) { - CXXRecordDecl *Awaiter = - S.getCommonExpr()->getType().getNonReferenceType()->getAsCXXRecordDecl(); - - // Return true conservatively if the awaiter type is not a record type. - if (!Awaiter) - return true; - - // In case the awaiter type is empty, the suspend wouldn't leak the coroutine - // handle. - // - // TODO: We can improve this by looking into the implementation of - // await-suspend and see if the coroutine handle is passed to foreign - // functions. - return !Awaiter->field_empty(); -} - // Emit suspend expression which roughly looks like: // // auto && x = CommonExpr(); @@ -229,11 +199,8 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); CGF.CurCoro.InSuspendBlock = true; - CGF.CurCoro.MayCoroHandleEscape = MayCoroHandleEscape(S); auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); CGF.CurCoro.InSuspendBlock = false; - CGF.CurCoro.MayCoroHandleEscape = false; - if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. BasicBlock *RealSuspendBlock = diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 28ec2b970072..8722fd4550e4 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -334,7 +334,6 @@ class CodeGenFunction : public CodeGenTypeCache { struct CGCoroInfo { std::unique_ptr Data; bool InSuspendBlock = false; - bool MayCoroHandleEscape = false; CGCoroInfo(); ~CGCoroInfo(); }; @@ -348,10 +347,6 @@ class CodeGenFunction : public CodeGenTypeCache { return isCoroutine() && CurCoro.InSuspendBlock; } - bool mayCoroHandleEscape() const { - return isCoroutine() && CurCoro.MayCoroHandleEscape; - } - /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 3a3cc246d3af..1b58261b22a2 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1160,8 +1160,7 @@ void TextDiagnostic::emitSnippetAndCaret( // Find the set of lines to include. const unsigned MaxLines = DiagOpts->SnippetLineLimit; std::pair Lines = {CaretLineNo, CaretLineNo}; - unsigned DisplayLineNo = - Ranges.empty() ? Loc.getPresumedLoc().getLine() : ~0u; + unsigned DisplayLineNo = Loc.getPresumedLoc().getLine(); for (const auto &I : Ranges) { if (auto OptionalRange = findLinesForRange(I, FID, SM)) Lines = maybeAddRange(Lines, *OptionalRange, MaxLines); diff --git a/clang/lib/Headers/cuda_wrappers/bits/basic_string.h b/clang/lib/Headers/cuda_wrappers/bits/basic_string.h new file mode 100644 index 000000000000..64f50d9f6a72 --- /dev/null +++ b/clang/lib/Headers/cuda_wrappers/bits/basic_string.h @@ -0,0 +1,9 @@ +// CUDA headers define __noinline__ which interferes with libstdc++'s use of +// `__attribute((__noinline__))`. In order to avoid compilation error, +// temporarily unset __noinline__ when we include affected libstdc++ header. + +#pragma push_macro("__noinline__") +#undef __noinline__ +#include_next "bits/basic_string.h" + +#pragma pop_macro("__noinline__") diff --git a/clang/lib/Headers/cuda_wrappers/bits/basic_string.tcc b/clang/lib/Headers/cuda_wrappers/bits/basic_string.tcc new file mode 100644 index 000000000000..90c7fe34d932 --- /dev/null +++ b/clang/lib/Headers/cuda_wrappers/bits/basic_string.tcc @@ -0,0 +1,9 @@ +// CUDA headers define __noinline__ which interferes with libstdc++'s use of +// `__attribute((__noinline__))`. In order to avoid compilation error, +// temporarily unset __noinline__ when we include affected libstdc++ header. + +#pragma push_macro("__noinline__") +#undef __noinline__ +#include_next "bits/basic_string.tcc" + +#pragma pop_macro("__noinline__") diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f8e48728da66..8626fc6ea16f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11308,7 +11308,11 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, Hints.push_back( FixItHint::CreateInsertion(E->getBeginLoc(), CastFix.str())); - SourceLocation After = S.getLocForEndOfToken(E->getEndLoc()); + // We don't use getLocForEndOfToken because it returns invalid source + // locations for macro expansions (by design). + SourceLocation EndLoc = S.SourceMgr.getSpellingLoc(E->getEndLoc()); + SourceLocation After = EndLoc.getLocWithOffset( + Lexer::MeasureTokenLength(EndLoc, S.SourceMgr, S.LangOpts)); Hints.push_back(FixItHint::CreateInsertion(After, ")")); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 31404855e3b1..3e53c8fc5740 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -96,6 +96,7 @@ DependencyScanningFilesystemSharedCache:: DependencyScanningFilesystemSharedCache::CacheShard & DependencyScanningFilesystemSharedCache::getShardForFilename( StringRef Filename) const { + assert(llvm::sys::path::is_absolute_gnu(Filename)); return CacheShards[llvm::hash_value(Filename) % NumShards]; } @@ -109,6 +110,7 @@ DependencyScanningFilesystemSharedCache::getShardForUID( const CachedFileSystemEntry * DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( StringRef Filename) const { + assert(llvm::sys::path::is_absolute_gnu(Filename)); std::lock_guard LockGuard(CacheLock); auto It = EntriesByFilename.find(Filename); return It == EntriesByFilename.end() ? nullptr : It->getValue(); @@ -189,6 +191,14 @@ static bool shouldCacheStatFailures(StringRef Filename) { return shouldScanForDirectivesBasedOnExtension(Filename); } +DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( + DependencyScanningFilesystemSharedCache &SharedCache, + IntrusiveRefCntPtr FS) + : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), + WorkingDirForCacheLookup(llvm::errc::invalid_argument) { + updateWorkingDirForCacheLookup(); +} + bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( StringRef Filename) { return shouldScanForDirectivesBasedOnExtension(Filename); @@ -215,44 +225,62 @@ DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( } llvm::ErrorOr -DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { - llvm::ErrorOr Stat = getUnderlyingFS().status(Filename); +DependencyScanningWorkerFilesystem::computeAndStoreResult( + StringRef OriginalFilename, StringRef FilenameForLookup) { + llvm::ErrorOr Stat = + getUnderlyingFS().status(OriginalFilename); if (!Stat) { - if (!shouldCacheStatFailures(Filename)) + if (!shouldCacheStatFailures(OriginalFilename)) return Stat.getError(); const auto &Entry = - getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); - return insertLocalEntryForFilename(Filename, Entry); + getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); + return insertLocalEntryForFilename(FilenameForLookup, Entry); } if (const auto *Entry = findSharedEntryByUID(*Stat)) - return insertLocalEntryForFilename(Filename, *Entry); + return insertLocalEntryForFilename(FilenameForLookup, *Entry); auto TEntry = - Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename); + Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); const CachedFileSystemEntry *SharedEntry = [&]() { if (TEntry) { const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); - return &getOrInsertSharedEntryForFilename(Filename, UIDEntry); + return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); } - return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError()); + return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, + TEntry.getError()); }(); - return insertLocalEntryForFilename(Filename, *SharedEntry); + return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); } llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - StringRef Filename, bool DisableDirectivesScanning) { - if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) - return scanForDirectivesIfNecessary(*Entry, Filename, + StringRef OriginalFilename, bool DisableDirectivesScanning) { + StringRef FilenameForLookup; + SmallString<256> PathBuf; + if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) { + FilenameForLookup = OriginalFilename; + } else if (!WorkingDirForCacheLookup) { + return WorkingDirForCacheLookup.getError(); + } else { + StringRef RelFilename = OriginalFilename; + RelFilename.consume_front("./"); + PathBuf = *WorkingDirForCacheLookup; + llvm::sys::path::append(PathBuf, RelFilename); + FilenameForLookup = PathBuf.str(); + } + assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup)); + if (const auto *Entry = + findEntryByFilenameWithWriteThrough(FilenameForLookup)) + return scanForDirectivesIfNecessary(*Entry, OriginalFilename, DisableDirectivesScanning) .unwrapError(); - auto MaybeEntry = computeAndStoreResult(Filename); + auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup); if (!MaybeEntry) return MaybeEntry.getError(); - return scanForDirectivesIfNecessary(*MaybeEntry, Filename, + return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename, DisableDirectivesScanning) .unwrapError(); } @@ -330,3 +358,24 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { return Result.getError(); return DepScanFile::create(Result.get()); } + +std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( + const Twine &Path) { + std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path); + updateWorkingDirForCacheLookup(); + return EC; +} + +void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { + llvm::ErrorOr CWD = + getUnderlyingFS().getCurrentWorkingDirectory(); + if (!CWD) { + WorkingDirForCacheLookup = CWD.getError(); + } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) { + WorkingDirForCacheLookup = llvm::errc::invalid_argument; + } else { + WorkingDirForCacheLookup = *CWD; + } + assert(!WorkingDirForCacheLookup || + llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); +} diff --git a/libcxx/include/__config b/libcxx/include/__config index 43f8a20031ff..c37d643414aa 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -40,7 +40,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 170000 +# define _LIBCPP_VERSION 170002 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 85568a12531f..c9b6318fe521 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1796,7 +1796,7 @@ void Writer::createGuardCFTables() { // Add the ehcont target table unless the user told us not to. if (config->guardCF & GuardCFLevel::EHCont) maybeAddRVATable(std::move(ehContTargets), "__guard_eh_cont_table", - "__guard_eh_cont_count", true); + "__guard_eh_cont_count"); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 4578af069814..d23db1574e9d 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -162,8 +162,18 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try /// to be clever about PHI nodes which differ only in the order of the incoming /// values, but instcombine orders them so it usually won't matter. +/// +/// This overload removes the duplicate PHI nodes directly. bool EliminateDuplicatePHINodes(BasicBlock *BB); +/// Check for and eliminate duplicate PHI nodes in this block. This doesn't try +/// to be clever about PHI nodes which differ only in the order of the incoming +/// values, but instcombine orders them so it usually won't matter. +/// +/// This overload collects the PHI nodes to be removed into the ToRemove set. +bool EliminateDuplicatePHINodes(BasicBlock *BB, + SmallPtrSetImpl &ToRemove); + /// This function is used to do simplification of a CFG. For example, it /// adjusts branches to branches to eliminate the extra hop, it eliminates /// unreachable basic blocks, and does other peephole optimization of the CFG. diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 9ff277f5334e..a2f46edcf5ef 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -2806,14 +2806,16 @@ LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); } /// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes( - Function *Caller, Function *Callee, + Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref &GetTLI) { // Note that CalleeTLI must be a copy not a reference. The legacy pass manager // caches the most recently created TLI in the TargetLibraryInfoWrapperPass // object, and always returns the same object (which is overwritten on each // GetTLI call). Therefore we copy the first result. auto CalleeTLI = GetTLI(*Callee); - return GetTLI(*Caller).areInlineCompatible(CalleeTLI, + return (IgnoreTTIInlineCompatible || + TTI.areInlineCompatible(Caller, Callee)) && + GetTLI(*Caller).areInlineCompatible(CalleeTLI, InlineCallerSupersetNoBuiltin) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); } @@ -2929,12 +2931,6 @@ std::optional llvm::getAttributeBasedInliningDecision( " address space"); } - // Never inline functions with conflicting target attributes. - Function *Caller = Call.getCaller(); - if (!IgnoreTTIInlineCompatible && - !CalleeTTI.areInlineCompatible(Caller, Callee)) - return InlineResult::failure("conflicting target attributes"); - // Calls to functions with always-inline attributes should be inlined // whenever possible. if (Call.hasFnAttr(Attribute::AlwaysInline)) { @@ -2949,12 +2945,8 @@ std::optional llvm::getAttributeBasedInliningDecision( // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). - // FIXME: functionsHaveCompatibleAttributes below checks for compatibilities - // of different kinds of function attributes -- sanitizer-related ones, - // checkDenormMode, no-builtin-memcpy, etc. It's unclear if we really want - // the always-inline attribute to take precedence over these different types - // of function attributes. - if (!functionsHaveCompatibleAttributes(Caller, Callee, GetTLI)) + Function *Caller = Call.getCaller(); + if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) return InlineResult::failure("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 66b9086e1d88..3453b718b453 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -370,37 +370,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // If in RPO ordering chosen to walk the CFG we happen to visit the b[k] // before visiting the memcpy block (which will contain the lifetime start // for "b" then it will appear that 'b' has a degenerate lifetime. -// -// Handle Windows Exception with LifetimeStartOnFirstUse: -// ----------------- -// -// There was a bug for using LifetimeStartOnFirstUse in win32. -// class Type1 { -// ... -// ~Type1(){ write memory;} -// } -// ... -// try{ -// Type1 V -// ... -// } catch (Type2 X){ -// ... -// } -// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots -// to prevent using LifetimeStartOnFirstUse. Because pX may merged with -// object V which may call destructor after implicitly writing pX. All these -// are done in C++ EH runtime libs (through CxxThrowException), and can't -// obviously check it in IR level. -// -// The loader of pX, without obvious writing IR, is usually the first LOAD MI -// in EHPad, Some like: -// bb.x.catch.i (landing-pad, ehfunclet-entry): -// ; predecessors: %bb... -// successors: %bb... -// %n:gr32 = MOV32rm %stack.pX ... -// ... -// The Type2** %stack.pX will only be written in EH runtime libs, so we -// check the StoreSlots to screen it out. namespace { @@ -462,9 +431,6 @@ class StackColoring : public MachineFunctionPass { /// slots lifetime-start-on-first-use is disabled). BitVector ConservativeSlots; - /// Record the FI slots referenced by a 'may write to memory'. - BitVector StoreSlots; - /// Number of iterations taken during data flow analysis. unsigned NumIterations; @@ -660,13 +626,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { InterestingSlots.resize(NumSlot); ConservativeSlots.clear(); ConservativeSlots.resize(NumSlot); - StoreSlots.clear(); - StoreSlots.resize(NumSlot); // number of start and end lifetime ops for each slot SmallVector NumStartLifetimes(NumSlot, 0); SmallVector NumEndLifetimes(NumSlot, 0); - SmallVector NumLoadInCatchPad(NumSlot, 0); // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. @@ -722,13 +685,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { if (! BetweenStartEnd.test(Slot)) { ConservativeSlots.set(Slot); } - // Here we check the StoreSlots to screen catch point out. For more - // information, please refer "Handle Windows Exception with - // LifetimeStartOnFirstUse" at the head of this file. - if (MI.mayStore()) - StoreSlots.set(Slot); - if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad()) - NumLoadInCatchPad[Slot] += 1; } } } @@ -739,14 +695,24 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { return 0; } - // 1) PR27903: slots with multiple start or end lifetime ops are not + // PR27903: slots with multiple start or end lifetime ops are not // safe to enable for "lifetime-start-on-first-use". - // 2) And also not safe for variable X in catch(X) in windows. for (unsigned slot = 0; slot < NumSlot; ++slot) { - if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 || - (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot))) + if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) ConservativeSlots.set(slot); } + + // The write to the catch object by the personality function is not propely + // modeled in IR: It happens before any cleanuppads are executed, even if the + // first mention of the catch object is in a catchpad. As such, mark catch + // object slots as conservative, so they are excluded from first-use analysis. + if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) + for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap) + for (WinEHHandlerType &H : TBME.HandlerArray) + if (H.CatchObj.FrameIndex != std::numeric_limits::max() && + H.CatchObj.FrameIndex >= 0) + ConservativeSlots.set(H.CatchObj.FrameIndex); + LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); // Step 2: compute begin/end sets for each block diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c7a6dd7deb45..a1753a40a117 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7388,7 +7388,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Ensure we generate all stores for each tuple part, whilst updating the // pointer after each store correctly using vscale. while (NumParts) { - Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI); + SDValue Store = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI); + MemOpChains.push_back(Store); + NumParts--; if (NumParts > 0) { SDValue BytesIncrement; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 471b706cc408..fde386188cd8 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -4854,7 +4854,7 @@ class AI_crc32 sz, string suffix, SDPatternOperator builtin> : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV8, HasCRC]> { + Requires<[IsARM, HasCRC]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 610a71d68ec8..f68f73523ba1 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3448,7 +3448,7 @@ class T2I_crc32 sz, string suffix, SDPatternOperator builtin> : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"), [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2, HasV8, HasCRC]> { + Requires<[IsThumb2, HasCRC]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b010110; let Inst{20} = C; diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a089d61616e1..5c10d6307c76 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -279,7 +279,7 @@ class PPCAIXAsmPrinter : public PPCAsmPrinter { void emitFunctionBodyEnd() override; - void emitPGORefs(); + void emitPGORefs(Module &M); void emitEndOfAsmFile(Module &) override; @@ -2636,10 +2636,28 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() { getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM)); } -void PPCAIXAsmPrinter::emitPGORefs() { - if (OutContext.hasXCOFFSection( +void PPCAIXAsmPrinter::emitPGORefs(Module &M) { + if (!OutContext.hasXCOFFSection( "__llvm_prf_cnts", - XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + return; + + // When inside a csect `foo`, a .ref directive referring to a csect `bar` + // translates into a relocation entry from `foo` to` bar`. The referring + // csect, `foo`, is identified by its address. If multiple csects have the + // same address (because one or more of them are zero-length), the referring + // csect cannot be determined. Hence, we don't generate the .ref directives + // if `__llvm_prf_cnts` is an empty section. + bool HasNonZeroLengthPrfCntsSection = false; + const DataLayout &DL = M.getDataLayout(); + for (GlobalVariable &GV : M.globals()) + if (GV.hasSection() && GV.getSection().equals("__llvm_prf_cnts") && + DL.getTypeAllocSize(GV.getValueType()) > 0) { + HasNonZeroLengthPrfCntsSection = true; + break; + } + + if (HasNonZeroLengthPrfCntsSection) { MCSection *CntsSection = OutContext.getXCOFFSection( "__llvm_prf_cnts", SectionKind::getData(), XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD), @@ -2673,7 +2691,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { if (M.empty() && TOCDataGlobalVars.empty()) return; - emitPGORefs(); + emitPGORefs(M); // Switch to section to emit TOC base. OutStreamer->switchSection(getObjFileLowering().getTOCBaseSection()); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 03e8a2507b45..4cfc0bacefbc 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2778,7 +2778,12 @@ bool GVNPass::processBlock(BasicBlock *BB) { // use our normal hash approach for phis. Instead, simply look for // obvious duplicates. The first pass of GVN will tend to create // identical phis, and the second or later passes can eliminate them. - ChangedFunction |= EliminateDuplicatePHINodes(BB); + SmallPtrSet PHINodesToRemove; + ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove); + for (PHINode *PN : PHINodesToRemove) { + VN.erase(PN); + removeInstruction(PN); + } for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 00937e0d734a..91dc5018d232 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -332,6 +332,17 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, return !MSSA->dominates(Clobber, Start); } +// Update AA metadata +static void combineAAMetadata(Instruction *ReplInst, Instruction *I) { + // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be + // handled here, but combineMetadata doesn't support them yet + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_invariant_group, + LLVMContext::MD_access_group}; + combineMetadata(ReplInst, I, KnownIDs, true); +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -1086,16 +1097,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, MSSA->getMemoryAccess(C)); } - // Update AA metadata - // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be - // handled here, but combineMetadata doesn't support them yet - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_invariant_group, - LLVMContext::MD_access_group}; - combineMetadata(C, cpyLoad, KnownIDs, true); + combineAAMetadata(C, cpyLoad); if (cpyLoad != cpyStore) - combineMetadata(C, cpyStore, KnownIDs, true); + combineAAMetadata(C, cpyStore); ++NumCallSlot; return true; @@ -1694,6 +1698,7 @@ bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { << " " << CB << "\n"); // Otherwise we're good! Update the immut argument. + combineAAMetadata(&CB, MDep); CB.setArgOperand(ArgNo, MDep->getSource()); ++NumMemCpyInstr; return true; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index ad7d34b61470..633d077e6492 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2126,9 +2126,10 @@ static void unswitchNontrivialInvariants( Loop &L, Instruction &TI, ArrayRef Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - function_ref DestroyLoopCB, bool InsertFreeze) { + function_ref DestroyLoopCB, bool InsertFreeze, + bool InjectedCondition) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); SwitchInst *SI = BI ? nullptr : cast(&TI); @@ -2581,7 +2582,7 @@ static void unswitchNontrivialInvariants( for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops); + UnswitchCB(IsStillLoop, PartiallyInvariant, InjectedCondition, SibLoops); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -2979,13 +2980,6 @@ static bool shouldTryInjectInvariantCondition( /// the metadata. bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc) { - // Skip branches that have already been unswithed this way. After successful - // unswitching of injected condition, we will still have a copy of this loop - // which looks exactly the same as original one. To prevent the 2nd attempt - // of unswitching it in the same pass, mark this branch as "nothing to do - // here". - if (BI->hasMetadata("llvm.invariant.condition.injection.disabled")) - return false; SmallVector Weights; if (!extractBranchWeights(*BI, Weights)) return false; @@ -3060,7 +3054,6 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, auto *InjectedCond = ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond", Preheader->getTerminator()); - auto *OldCond = TI->getCondition(); BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check", BB->getParent(), InLoopSucc); @@ -3069,12 +3062,9 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); Builder.SetInsertPoint(CheckBlock); - auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc); - + Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0), + TI->getSuccessor(1)); TI->eraseFromParent(); - // Prevent infinite unswitching. - NewTerm->setMetadata("llvm.invariant.condition.injection.disabled", - MDNode::get(BB->getContext(), {})); // Fixup phis. for (auto &I : *InLoopSucc) { @@ -3442,7 +3432,7 @@ static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, static bool unswitchBestCondition( Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref DestroyLoopCB) { // Collect all invariant conditions within this loop (as opposed to an inner @@ -3452,9 +3442,10 @@ static bool unswitchBestCondition( Instruction *PartialIVCondBranch = nullptr; collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, PartialIVCondBranch, L, LI, AA, MSSAU); - collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, - PartialIVCondBranch, L, DT, LI, AA, - MSSAU); + if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable")) + collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, DT, LI, AA, + MSSAU); // If we didn't find any candidates, we're done. if (UnswitchCandidates.empty()) return false; @@ -3475,8 +3466,11 @@ static bool unswitchBestCondition( return false; } - if (Best.hasPendingInjection()) + bool InjectedCondition = false; + if (Best.hasPendingInjection()) { Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU); + InjectedCondition = true; + } assert(!Best.hasPendingInjection() && "All injections should have been done by now!"); @@ -3504,7 +3498,7 @@ static bool unswitchBestCondition( << ") terminator: " << *Best.TI << "\n"); unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB, - InsertFreeze); + InsertFreeze, InjectedCondition); return true; } @@ -3533,7 +3527,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref DestroyLoopCB) { @@ -3548,7 +3542,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { // If we unswitched successfully we will want to clean up the loop before // processing it further so just mark it as unswitched and return. - UnswitchCB(/*CurrentLoopValid*/ true, false, {}); + UnswitchCB(/*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false, + /*InjectedCondition*/ false, {}); return true; } @@ -3644,6 +3639,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, bool PartiallyInvariant, + bool InjectedCondition, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. if (!NewLoops.empty()) @@ -3663,6 +3659,16 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, {DisableUnswitchMD}); L.setLoopID(NewLoopID); + } else if (InjectedCondition) { + // Do the same for injection of invariant conditions. + auto &Context = L.getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, + MDString::get(Context, "llvm.loop.unswitch.injection.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L.getLoopID(), {"llvm.loop.unswitch.injection"}, + {DisableUnswitchMD}); + L.setLoopID(NewLoopID); } else U.revisitCurrentLoop(); } else @@ -3755,6 +3761,7 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { auto *SE = SEWP ? &SEWP->getSE() : nullptr; auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant, + bool InjectedCondition, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) @@ -3765,9 +3772,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { // but it is the best we can do in the old PM. if (CurrentLoopValid) { // If the current loop has been unswitched using a partially invariant - // condition, we should not re-add the current loop to avoid unswitching - // on the same condition again. - if (!PartiallyInvariant) + // condition or injected invariant condition, we should not re-add the + // current loop to avoid unswitching on the same condition again. + if (!PartiallyInvariant && !InjectedCondition) LPM.addLoop(*L); } else LPM.markLoopAsDeleted(*L); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index f153ace5d3fc..eeb0446c1197 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1247,7 +1247,9 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { +static bool +EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB, + SmallPtrSetImpl &ToRemove) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1263,12 +1265,14 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { // Note that we only look in the upper square's triangle, // we already checked that the lower triangle PHI's aren't identical. for (auto J = I; PHINode *DuplicatePN = dyn_cast(J); ++J) { + if (ToRemove.contains(DuplicatePN)) + continue; if (!DuplicatePN->isIdenticalToWhenDefined(PN)) continue; // A duplicate. Replace this PHI with the base PHI. ++NumPHICSEs; DuplicatePN->replaceAllUsesWith(PN); - DuplicatePN->eraseFromParent(); + ToRemove.insert(DuplicatePN); Changed = true; // The RAUW can change PHIs that we already visited. @@ -1279,7 +1283,9 @@ static bool EliminateDuplicatePHINodesNaiveImpl(BasicBlock *BB) { return Changed; } -static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { +static bool +EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB, + SmallPtrSetImpl &ToRemove) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. @@ -1343,12 +1349,14 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { // Examine each PHI. bool Changed = false; for (auto I = BB->begin(); PHINode *PN = dyn_cast(I++);) { + if (ToRemove.contains(PN)) + continue; auto Inserted = PHISet.insert(PN); if (!Inserted.second) { // A duplicate. Replace this PHI with its duplicate. ++NumPHICSEs; PN->replaceAllUsesWith(*Inserted.first); - PN->eraseFromParent(); + ToRemove.insert(PN); Changed = true; // The RAUW can change PHIs that we already visited. Start over from the @@ -1361,14 +1369,23 @@ static bool EliminateDuplicatePHINodesSetBasedImpl(BasicBlock *BB) { return Changed; } -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB, + SmallPtrSetImpl &ToRemove) { if ( #ifndef NDEBUG !PHICSEDebugHash && #endif hasNItemsOrLess(BB->phis(), PHICSENumPHISmallSize)) - return EliminateDuplicatePHINodesNaiveImpl(BB); - return EliminateDuplicatePHINodesSetBasedImpl(BB); + return EliminateDuplicatePHINodesNaiveImpl(BB, ToRemove); + return EliminateDuplicatePHINodesSetBasedImpl(BB, ToRemove); +} + +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + SmallPtrSet ToRemove; + bool Changed = EliminateDuplicatePHINodes(BB, ToRemove); + for (PHINode *PN : ToRemove) + PN->eraseFromParent(); + return Changed; } /// If the specified pointer points to an object that we control, try to modify diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index d3a9a41aef15..bd7ab7c98781 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6051,8 +6051,9 @@ SwitchLookupTable::SwitchLookupTable( bool LinearMappingPossible = true; APInt PrevVal; APInt DistToPrev; - // When linear map is monotonic, we can attach nsw. - bool Wrapped = false; + // When linear map is monotonic and signed overflow doesn't happen on + // maximum index, we can attach nsw on Add and Mul. + bool NonMonotonic = false; assert(TableSize >= 2 && "Should be a SingleValue table."); // Check if there is the same distance between two consecutive values. for (uint64_t I = 0; I < TableSize; ++I) { @@ -6072,7 +6073,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - Wrapped |= + NonMonotonic |= Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal); } PrevVal = Val; @@ -6080,7 +6081,10 @@ SwitchLookupTable::SwitchLookupTable( if (LinearMappingPossible) { LinearOffset = cast(TableContents[0]); LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); - LinearMapValWrapped = Wrapped; + bool MayWrap = false; + APInt M = LinearMultiplier->getValue(); + (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap); + LinearMapValWrapped = NonMonotonic || MayWrap; Kind = LinearMapKind; ++NumLinearMaps; return; diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 3b5250d56707..e2fff69dae20 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -1200,18 +1200,32 @@ Expected> launchRemote() { // For real JIT uses, the real compiler support libraries should be linked // in, somehow; this is a workaround to let tests pass. // +// We need to make sure that this symbol actually is linked in when we +// try to export it; if no functions allocate a large enough stack area, +// nothing would reference it. Therefore, manually declare it and add a +// reference to it. (Note, the declarations of _alloca/___chkstk_ms/__chkstk +// are somewhat bogus, these functions use a different custom calling +// convention.) +// // TODO: Move this into libORC at some point, see // https://github.com/llvm/llvm-project/issues/56603. #ifdef __MINGW32__ // This is a MinGW version of #pragma comment(linker, "...") that doesn't // require compiling with -fms-extensions. #if defined(__i386__) +#undef _alloca +extern "C" void _alloca(void); +static __attribute__((used)) void (*const ref_func)(void) = _alloca; static __attribute__((section(".drectve"), used)) const char export_chkstk[] = "-export:_alloca"; #elif defined(__x86_64__) +extern "C" void ___chkstk_ms(void); +static __attribute__((used)) void (*const ref_func)(void) = ___chkstk_ms; static __attribute__((section(".drectve"), used)) const char export_chkstk[] = "-export:___chkstk_ms"; #else +extern "C" void __chkstk(void); +static __attribute__((used)) void (*const ref_func)(void) = __chkstk; static __attribute__((section(".drectve"), used)) const char export_chkstk[] = "-export:__chkstk"; #endif diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 0a5073d2d23f..79fef8bc123c 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -940,36 +940,34 @@ void COFFDumper::printCOFFLoadConfig() { OS << " flags " << utohexstr(Flags); }; + // The stride gives the number of extra bytes in addition to the 4-byte + // RVA of each entry in the table. As of writing only a 1-byte extra flag + // has been defined. + uint32_t Stride = Tables.GuardFlags >> 28; + PrintExtraCB PrintExtra = Stride == 1 ? +PrintGuardFlags : nullptr; + if (Tables.GuardFidTableVA) { ListScope LS(W, "GuardFidTable"); - if (uint32_t Size = - Tables.GuardFlags & - uint32_t(COFF::GuardFlags::CF_FUNCTION_TABLE_SIZE_MASK)) { - // The size mask gives the number of extra bytes in addition to the 4-byte - // RVA of each entry in the table. As of writing only a 1-byte extra flag - // has been defined. - Size = (Size >> 28) + 4; - printRVATable(Tables.GuardFidTableVA, Tables.GuardFidTableCount, Size, - PrintGuardFlags); - } else { - printRVATable(Tables.GuardFidTableVA, Tables.GuardFidTableCount, 4); - } + printRVATable(Tables.GuardFidTableVA, Tables.GuardFidTableCount, + 4 + Stride, PrintExtra); } if (Tables.GuardIatTableVA) { ListScope LS(W, "GuardIatTable"); - printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, 4); + printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, + 4 + Stride, PrintExtra); } if (Tables.GuardLJmpTableVA) { ListScope LS(W, "GuardLJmpTable"); - printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, 4); + printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, + 4 + Stride, PrintExtra); } if (Tables.GuardEHContTableVA) { ListScope LS(W, "GuardEHContTable"); - printRVATable(Tables.GuardEHContTableVA, Tables.GuardEHContTableCount, 5, - PrintGuardFlags); + printRVATable(Tables.GuardEHContTableVA, Tables.GuardEHContTableCount, + 4 + Stride, PrintExtra); } }