From 2d22c9abbd20fdc3d3a469075bd151ca1bd02ee4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 26 Jul 2023 21:11:12 +0200 Subject: [PATCH 1/3] Vendor import of llvm-project branch release/17.x llvmorg-17-init-19311-gbc849e525f80. --- clang/lib/Basic/Targets/LoongArch.cpp | 25 +- clang/lib/Basic/Targets/LoongArch.h | 3 + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 23 +- clang/lib/Driver/ToolChains/Clang.cpp | 22 +- libcxx/include/__mdspan/extents.h | 63 ++-- libcxx/include/__mdspan/layout_left.h | 30 +- libcxx/include/__mdspan/layout_right.h | 30 +- libcxx/include/__mdspan/mdspan.h | 308 ++++++++++++++++++ libcxx/include/mdspan | 130 ++++++++ libcxx/include/module.modulemap.in | 1 + libcxx/modules/std/mdspan.cppm | 2 +- .../llvm/TargetParser/LoongArchTargetParser.h | 9 +- .../Transforms/IPO/FunctionSpecialization.h | 15 +- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 4 +- llvm/lib/Target/LoongArch/LoongArch.td | 5 + .../TargetParser/LoongArchTargetParser.cpp | 25 ++ .../Transforms/IPO/FunctionSpecialization.cpp | 82 +---- openmp/runtime/src/ompt-event-specific.h | 13 +- 18 files changed, 644 insertions(+), 146 deletions(-) create mode 100644 libcxx/include/__mdspan/mdspan.h diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 6958479cd7c4..f08e5e732b03 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -15,7 +15,7 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TargetParser/TargetParser.h" +#include "llvm/TargetParser/LoongArchTargetParser.h" using namespace clang; using namespace clang::targets; @@ -198,7 +198,19 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, else Builder.defineMacro("__loongarch_frlen", "0"); - // TODO: define __loongarch_arch and __loongarch_tune. + // Define __loongarch_arch. + StringRef Arch = llvm::LoongArch::getArch(); + if (Arch.empty()) + Arch = llvm::LoongArch::getDefaultArch(GRLen == 64); + if (!Arch.empty()) + Builder.defineMacro("__loongarch_arch", Arch); + + // Define __loongarch_tune. + StringRef TuneCPU = llvm::LoongArch::getTuneCPU(); + if (TuneCPU.empty()) + TuneCPU = Arch; + if (!TuneCPU.empty()) + Builder.defineMacro("__loongarch_tune", TuneCPU); StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") @@ -270,3 +282,12 @@ bool LoongArchTargetInfo::handleTargetFeatures( } return true; } + +bool LoongArchTargetInfo::isValidTuneCPUName(StringRef Name) const { + return llvm::LoongArch::isValidTuneCPUName(Name); +} + +void LoongArchTargetInfo::fillValidTuneCPUList( + SmallVectorImpl &Values) const { + llvm::LoongArch::fillValidTuneCPUList(Values); +} diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 52c4ce425368..60d545566b30 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -80,6 +80,9 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { const std::vector &FeaturesVec) const override; bool hasFeature(StringRef Feature) const override; + + bool isValidTuneCPUName(StringRef Name) const override; + void fillValidTuneCPUList(SmallVectorImpl &Values) const override; }; class LLVM_LIBRARY_VISIBILITY LoongArch32TargetInfo diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 856ad58f3bd9..6cbb06b9a91f 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -12,6 +12,7 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" +#include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/LoongArchTargetParser.h" using namespace clang::driver; @@ -128,21 +129,29 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, std::vector &Features) { StringRef ArchName; if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { - if (!llvm::LoongArch::isValidArchName(A->getValue())) { + ArchName = A->getValue(); + + // Handle -march=native. + if (ArchName == "native") { + ArchName = llvm::sys::getHostCPUName(); + if (ArchName == "generic") + ArchName = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); + } + + if (!llvm::LoongArch::isValidArchName(ArchName)) { D.Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args); return; } - ArchName = A->getValue(); } - // TODO: handle -march=native and -mtune=xx. - // Select a default arch name. - if (ArchName.empty() && Triple.isLoongArch64()) - ArchName = "loongarch64"; + if (ArchName.empty()) + ArchName = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); - if (!ArchName.empty()) + if (!ArchName.empty()) { llvm::LoongArch::getArchFeatures(ArchName, Features); + llvm::LoongArch::setArch(ArchName); + } // Select floating-point features determined by -mdouble-float, // -msingle-float, -msoft-float and -mfpu. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index adb550d9c5da..e3fa315ffcb1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/YAMLParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include "llvm/TargetParser/Host.h" +#include "llvm/TargetParser/LoongArchTargetParser.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include @@ -1853,10 +1854,25 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, void Clang::AddLoongArchTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { + const llvm::Triple &Triple = getToolChain().getTriple(); + CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(loongarch::getLoongArchABI(getToolChain().getDriver(), Args, - getToolChain().getTriple()) - .data()); + CmdArgs.push_back( + loongarch::getLoongArchABI(getToolChain().getDriver(), Args, Triple) + .data()); + + // Handle -mtune. + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { + StringRef TuneCPU = A->getValue(); + if (TuneCPU == "native") { + TuneCPU = llvm::sys::getHostCPUName(); + if (TuneCPU == "generic") + TuneCPU = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); + } + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + llvm::LoongArch::setTuneCPU(TuneCPU); + } } void Clang::AddMIPSTargetArgs(const ArgList &Args, diff --git a/libcxx/include/__mdspan/extents.h b/libcxx/include/__mdspan/extents.h index 42355678d60c..a510220d4096 100644 --- a/libcxx/include/__mdspan/extents.h +++ b/libcxx/include/__mdspan/extents.h @@ -171,11 +171,14 @@ struct __maybe_static_array { _TStatic __static_val = _StaticValues::__get(__i); if (__static_val == _DynTag) { __dyn_vals_[_DynamicIdxMap::__get(__i)] = __values[__i]; - } - // Precondition check - else - _LIBCPP_ASSERT_UNCATEGORIZED(__values[__i] == static_cast<_TDynamic>(__static_val), - "extents construction: mismatch of provided arguments with static extents."); + } else + // Not catching this could lead to out of bounds errors later + // e.g. using my_mdspan_t = mdspan>; my_mdspan_t = m(new int[5], 5); + // Right-hand-side construction looks ok with allocation and size matching, + // but since (potentially elsewhere defined) my_mdspan_t has static size m now thinks its range is 10 not 5 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __values[__i] == static_cast<_TDynamic>(__static_val), + "extents construction: mismatch of provided arguments with static extents."); } } @@ -187,11 +190,14 @@ struct __maybe_static_array { _TStatic __static_val = _StaticValues::__get(__i); if (__static_val == _DynTag) { __dyn_vals_[_DynamicIdxMap::__get(__i)] = static_cast<_TDynamic>(__vals[__i]); - } - // Precondition check - else - _LIBCPP_ASSERT_UNCATEGORIZED(static_cast<_TDynamic>(__vals[__i]) == static_cast<_TDynamic>(__static_val), - "extents construction: mismatch of provided arguments with static extents."); + } else + // Not catching this could lead to out of bounds errors later + // e.g. using my_mdspan_t = mdspan>; my_mdspan_t = m(new int[N], span(&N)); + // Right-hand-side construction looks ok with allocation and size matching, + // but since (potentially elsewhere defined) my_mdspan_t has static size m now thinks its range is 10 not N + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + static_cast<_TDynamic>(__vals[__i]) == static_cast<_TDynamic>(__static_val), + "extents construction: mismatch of provided arguments with static extents."); } } @@ -310,28 +316,37 @@ class extents { (sizeof...(_OtherIndexTypes) == __rank_ || sizeof...(_OtherIndexTypes) == __rank_dynamic_)) _LIBCPP_HIDE_FROM_ABI constexpr explicit extents(_OtherIndexTypes... __dynvals) noexcept : __vals_(static_cast(__dynvals)...) { - _LIBCPP_ASSERT_UNCATEGORIZED(__mdspan_detail::__are_representable_as(__dynvals...), - "extents ctor: arguments must be representable as index_type and nonnegative"); + // Not catching this could lead to out of bounds errors later + // e.g. mdspan m(ptr, dextents(200u)); leads to an extent of -56 on m + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__are_representable_as(__dynvals...), + "extents ctor: arguments must be representable as index_type and nonnegative"); } template - requires(is_convertible_v<_OtherIndexType, index_type> && is_nothrow_constructible_v && + requires(is_convertible_v && + is_nothrow_constructible_v && (_Size == __rank_ || _Size == __rank_dynamic_)) explicit(_Size != __rank_dynamic_) _LIBCPP_HIDE_FROM_ABI constexpr extents(const array<_OtherIndexType, _Size>& __exts) noexcept : __vals_(span(__exts)) { - _LIBCPP_ASSERT_UNCATEGORIZED(__mdspan_detail::__are_representable_as(span(__exts)), - "extents ctor: arguments must be representable as index_type and nonnegative"); + // Not catching this could lead to out of bounds errors later + // e.g. mdspan m(ptr, dextents(array(200))); leads to an extent of -56 on m + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__are_representable_as(span(__exts)), + "extents ctor: arguments must be representable as index_type and nonnegative"); } template - requires(is_convertible_v<_OtherIndexType, index_type> && is_nothrow_constructible_v && + requires(is_convertible_v && + is_nothrow_constructible_v && (_Size == __rank_ || _Size == __rank_dynamic_)) explicit(_Size != __rank_dynamic_) _LIBCPP_HIDE_FROM_ABI constexpr extents(const span<_OtherIndexType, _Size>& __exts) noexcept : __vals_(__exts) { - _LIBCPP_ASSERT_UNCATEGORIZED(__mdspan_detail::__are_representable_as(__exts), - "extents ctor: arguments must be representable as index_type and nonnegative"); + // Not catching this could lead to out of bounds errors later + // e.g. array a{200u}; mdspan> m(ptr, extents(span(a))); leads to an extent of -56 + // on m + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__are_representable_as(__exts), + "extents ctor: arguments must be representable as index_type and nonnegative"); } private: @@ -380,10 +395,16 @@ class extents { for (size_t __r = 0; __r < rank(); __r++) { if constexpr (static_cast>(numeric_limits::max()) < static_cast>(numeric_limits<_OtherIndexType>::max())) { - _LIBCPP_ASSERT_UNCATEGORIZED(__mdspan_detail::__is_representable_as(__other.extent(__r)), - "extents ctor: arguments must be representable as index_type and nonnegative"); + // Not catching this could lead to out of bounds errors later + // e.g. dextents> e(dextents(200)) leads to an extent of -56 on e + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __mdspan_detail::__is_representable_as(__other.extent(__r)), + "extents ctor: arguments must be representable as index_type and nonnegative"); } - _LIBCPP_ASSERT_UNCATEGORIZED( + // Not catching this could lead to out of bounds errors later + // e.g. mdspan> m = mdspan>(new int[5], 5); + // Right-hand-side construction was ok, but m now thinks its range is 10 not 5 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( (_Values::__static_value(__r) == dynamic_extent) || (static_cast(__other.extent(__r)) == static_cast(_Values::__static_value(__r))), "extents construction: mismatch of provided arguments with static extents."); diff --git a/libcxx/include/__mdspan/layout_left.h b/libcxx/include/__mdspan/layout_left.h index e81e0d10b595..f890c5ae0256 100644 --- a/libcxx/include/__mdspan/layout_left.h +++ b/libcxx/include/__mdspan/layout_left.h @@ -75,8 +75,11 @@ class layout_left::mapping { _LIBCPP_HIDE_FROM_ABI constexpr mapping() noexcept = default; _LIBCPP_HIDE_FROM_ABI constexpr mapping(const mapping&) noexcept = default; _LIBCPP_HIDE_FROM_ABI constexpr mapping(const extents_type& __ext) noexcept : __extents_(__ext) { - _LIBCPP_ASSERT(__required_span_size_is_representable(__ext), - "layout_left::mapping extents ctor: product of extents must be representable as index_type."); + // not catching this could lead to out-of-bounds access later when used inside mdspan + // mapping> map(dextents(40,40)); map(10, 3) == -126 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __required_span_size_is_representable(__ext), + "layout_left::mapping extents ctor: product of extents must be representable as index_type."); } template @@ -84,7 +87,9 @@ class layout_left::mapping { _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherExtents, extents_type>) mapping(const mapping<_OtherExtents>& __other) noexcept : __extents_(__other.extents()) { - _LIBCPP_ASSERT( + // not catching this could lead to out-of-bounds access later when used inside mdspan + // mapping> map(mapping>(dextents(40,40))); map(10, 3) == -126 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __mdspan_detail::__is_representable_as(__other.required_span_size()), "layout_left::mapping converting ctor: other.required_span_size() must be representable as index_type."); } @@ -94,7 +99,13 @@ class layout_left::mapping { _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherExtents, extents_type>) mapping(const layout_right::mapping<_OtherExtents>& __other) noexcept : __extents_(__other.extents()) { - _LIBCPP_ASSERT( + // not catching this could lead to out-of-bounds access later when used inside mdspan + // Note: since this is constraint to rank 1, extents itself would catch the invalid conversion first + // and thus this assertion should never be triggered, but keeping it here for consistency + // layout_left::mapping> map( + // layout_right::mapping>(dextents(200))); map.extents().extent(0) == + // -56 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __mdspan_detail::__is_representable_as(__other.required_span_size()), "layout_left::mapping converting ctor: other.required_span_size() must be representable as index_type."); } @@ -122,6 +133,10 @@ class layout_left::mapping { requires((sizeof...(_Indices) == extents_type::rank()) && (is_convertible_v<_Indices, index_type> && ...) && (is_nothrow_constructible_v && ...)) _LIBCPP_HIDE_FROM_ABI constexpr index_type operator()(_Indices... __idx) const noexcept { + // Mappings are generally meant to be used for accessing allocations and are meant to guarantee to never + // return a value exceeding required_span_size(), which is used to know how large an allocation one needs + // Thus, this is a canonical point in multi-dimensional data structures to make invalid element access checks + // However, mdspan does check this on its own, so for now we avoid double checking in hardened mode _LIBCPP_ASSERT(__mdspan_detail::__is_multidimensional_index_in(__extents_, __idx...), "layout_left::mapping: out of bounds indexing"); array __idx_a{static_cast(__idx)...}; @@ -144,7 +159,10 @@ class layout_left::mapping { _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const noexcept requires(extents_type::rank() > 0) { - _LIBCPP_ASSERT(__r < extents_type::rank(), "layout_left::mapping::stride(): invalid rank index"); + // While it would be caught by extents itself too, using a too large __r + // is functionally an out of bounds access on the stored information needed to compute strides + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __r < extents_type::rank(), "layout_left::mapping::stride(): invalid rank index"); index_type __s = 1; for (rank_type __i = extents_type::rank() - 1; __i > __r; __i--) __s *= __extents_.extent(__i); @@ -159,7 +177,7 @@ class layout_left::mapping { } private: - extents_type __extents_{}; // exposition only + _LIBCPP_NO_UNIQUE_ADDRESS extents_type __extents_{}; }; #endif // _LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__mdspan/layout_right.h b/libcxx/include/__mdspan/layout_right.h index a8a91b86c714..3d814554a1be 100644 --- a/libcxx/include/__mdspan/layout_right.h +++ b/libcxx/include/__mdspan/layout_right.h @@ -74,8 +74,11 @@ class layout_right::mapping { _LIBCPP_HIDE_FROM_ABI constexpr mapping() noexcept = default; _LIBCPP_HIDE_FROM_ABI constexpr mapping(const mapping&) noexcept = default; _LIBCPP_HIDE_FROM_ABI constexpr mapping(const extents_type& __ext) noexcept : __extents_(__ext) { - _LIBCPP_ASSERT(__required_span_size_is_representable(__ext), - "layout_right::mapping extents ctor: product of extents must be representable as index_type."); + // not catching this could lead to out-of-bounds access later when used inside mdspan + // mapping> map(dextents(40,40)); map(3, 10) == -126 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __required_span_size_is_representable(__ext), + "layout_right::mapping extents ctor: product of extents must be representable as index_type."); } template @@ -83,7 +86,9 @@ class layout_right::mapping { _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherExtents, extents_type>) mapping(const mapping<_OtherExtents>& __other) noexcept : __extents_(__other.extents()) { - _LIBCPP_ASSERT( + // not catching this could lead to out-of-bounds access later when used inside mdspan + // mapping> map(mapping>(dextents(40,40))); map(3, 10) == -126 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __mdspan_detail::__is_representable_as(__other.required_span_size()), "layout_right::mapping converting ctor: other.required_span_size() must be representable as index_type."); } @@ -93,7 +98,13 @@ class layout_right::mapping { _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_OtherExtents, extents_type>) mapping(const layout_left::mapping<_OtherExtents>& __other) noexcept : __extents_(__other.extents()) { - _LIBCPP_ASSERT( + // not catching this could lead to out-of-bounds access later when used inside mdspan + // Note: since this is constraint to rank 1, extents itself would catch the invalid conversion first + // and thus this assertion should never be triggered, but keeping it here for consistency + // layout_right::mapping> map( + // layout_left::mapping>(dextents(200))); map.extents().extent(0) == + // -56 + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __mdspan_detail::__is_representable_as(__other.required_span_size()), "layout_right::mapping converting ctor: other.required_span_size() must be representable as index_type."); } @@ -121,6 +132,10 @@ class layout_right::mapping { requires((sizeof...(_Indices) == extents_type::rank()) && (is_convertible_v<_Indices, index_type> && ...) && (is_nothrow_constructible_v && ...)) _LIBCPP_HIDE_FROM_ABI constexpr index_type operator()(_Indices... __idx) const noexcept { + // Mappings are generally meant to be used for accessing allocations and are meant to guarantee to never + // return a value exceeding required_span_size(), which is used to know how large an allocation one needs + // Thus, this is a canonical point in multi-dimensional data structures to make invalid element access checks + // However, mdspan does check this on its own, so for now we avoid double checking in hardened mode _LIBCPP_ASSERT(__mdspan_detail::__is_multidimensional_index_in(__extents_, __idx...), "layout_right::mapping: out of bounds indexing"); return [&](index_sequence<_Pos...>) { @@ -141,7 +156,10 @@ class layout_right::mapping { _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const noexcept requires(extents_type::rank() > 0) { - _LIBCPP_ASSERT(__r < extents_type::rank(), "layout_right::mapping::stride(): invalid rank index"); + // While it would be caught by extents itself too, using a too large __r + // is functionally an out of bounds access on the stored information needed to compute strides + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __r < extents_type::rank(), "layout_right::mapping::stride(): invalid rank index"); index_type __s = 1; for (rank_type __i = extents_type::rank() - 1; __i > __r; __i--) __s *= __extents_.extent(__i); @@ -156,7 +174,7 @@ class layout_right::mapping { } private: - extents_type __extents_{}; // exposition only + _LIBCPP_NO_UNIQUE_ADDRESS extents_type __extents_{}; }; #endif // _LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__mdspan/mdspan.h b/libcxx/include/__mdspan/mdspan.h new file mode 100644 index 000000000000..58f3b9cf1b18 --- /dev/null +++ b/libcxx/include/__mdspan/mdspan.h @@ -0,0 +1,308 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___MDSPAN_MDSPAN_H +#define _LIBCPP___MDSPAN_MDSPAN_H + +#include <__assert> +#include <__config> +#include <__fwd/mdspan.h> +#include <__mdspan/default_accessor.h> +#include <__mdspan/extents.h> +#include <__type_traits/extent.h> +#include <__type_traits/is_abstract.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_constructible.h> +#include <__type_traits/is_convertible.h> +#include <__type_traits/is_default_constructible.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_pointer.h> +#include <__type_traits/is_same.h> +#include <__type_traits/rank.h> +#include <__type_traits/remove_all_extents.h> +#include <__type_traits/remove_cv.h> +#include <__type_traits/remove_pointer.h> +#include <__type_traits/remove_reference.h> +#include <__utility/integer_sequence.h> +#include +#include +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +// Helper for lightweight test checking that one did pass a layout policy as LayoutPolicy template argument +namespace __mdspan_detail { +template +concept __has_invalid_mapping = !requires { typename _Layout::template mapping<_Extents>; }; +} // namespace __mdspan_detail + +template > +class mdspan { +private: + static_assert(__mdspan_detail::__is_extents_v<_Extents>, + "mdspan: Extents template parameter must be a specialization of extents."); + static_assert(!is_array_v<_ElementType>, "mdspan: ElementType template parameter may not be an array type"); + static_assert(!is_abstract_v<_ElementType>, "mdspan: ElementType template parameter may not be an abstract class"); + static_assert(is_same_v<_ElementType, typename _AccessorPolicy::element_type>, + "mdspan: ElementType template parameter must match AccessorPolicy::element_type"); + static_assert(!__mdspan_detail::__has_invalid_mapping<_LayoutPolicy, _Extents>, + "mdspan: LayoutPolicy template parameter is invalid. A common mistake is to pass a layout mapping " + "instead of a layout policy"); + +public: + using extents_type = _Extents; + using layout_type = _LayoutPolicy; + using accessor_type = _AccessorPolicy; + using mapping_type = typename layout_type::template mapping; + using element_type = _ElementType; + using value_type = remove_cv_t; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using data_handle_type = typename accessor_type::data_handle_type; + using reference = typename accessor_type::reference; + + _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return extents_type::rank(); } + _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return extents_type::rank_dynamic(); } + _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept { + return extents_type::static_extent(__r); + } + _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { + return __map_.extents().extent(__r); + }; + +public: + //-------------------------------------------------------------------------------- + // [mdspan.mdspan.cons], mdspan constructors, assignment, and destructor + + _LIBCPP_HIDE_FROM_ABI constexpr mdspan() + requires((extents_type::rank_dynamic() > 0) && is_default_constructible_v && + is_default_constructible_v && is_default_constructible_v) + = default; + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(const mdspan&) = default; + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(mdspan&&) = default; + + template + requires((is_convertible_v<_OtherIndexTypes, index_type> && ...) && + (is_nothrow_constructible_v && ...) && + ((sizeof...(_OtherIndexTypes) == rank()) || (sizeof...(_OtherIndexTypes) == rank_dynamic())) && + is_constructible_v && is_default_constructible_v) + _LIBCPP_HIDE_FROM_ABI explicit constexpr mdspan(data_handle_type __p, _OtherIndexTypes... __exts) + : __ptr_(std::move(__p)), __map_(extents_type(static_cast(std::move(__exts))...)), __acc_{} {} + + template + requires(is_convertible_v && + is_nothrow_constructible_v && + ((_Size == rank()) || (_Size == rank_dynamic())) && is_constructible_v && + is_default_constructible_v) + explicit(_Size != rank_dynamic()) + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(data_handle_type __p, const array<_OtherIndexType, _Size>& __exts) + : __ptr_(std::move(__p)), __map_(extents_type(__exts)), __acc_{} {} + + template + requires(is_convertible_v && + is_nothrow_constructible_v && + ((_Size == rank()) || (_Size == rank_dynamic())) && is_constructible_v && + is_default_constructible_v) + explicit(_Size != rank_dynamic()) + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(data_handle_type __p, span<_OtherIndexType, _Size> __exts) + : __ptr_(std::move(__p)), __map_(extents_type(__exts)), __acc_{} {} + + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(data_handle_type __p, const extents_type& __exts) + requires(is_default_constructible_v && is_constructible_v) + : __ptr_(std::move(__p)), __map_(__exts), __acc_{} {} + + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(data_handle_type __p, const mapping_type& __m) + requires(is_default_constructible_v) + : __ptr_(std::move(__p)), __map_(__m), __acc_{} {} + + _LIBCPP_HIDE_FROM_ABI constexpr mdspan(data_handle_type __p, const mapping_type& __m, const accessor_type& __a) + : __ptr_(std::move(__p)), __map_(__m), __acc_(__a) {} + + template + requires(is_constructible_v&> && + is_constructible_v) + explicit(!is_convertible_v&, mapping_type> || + !is_convertible_v) + _LIBCPP_HIDE_FROM_ABI constexpr mdspan( + const mdspan<_OtherElementType, _OtherExtents, _OtherLayoutPolicy, _OtherAccessor>& __other) + : __ptr_(__other.__ptr_), __map_(__other.__map_), __acc_(__other.__acc_) { + static_assert(is_constructible_v, + "mdspan: incompatible data_handle_type for mdspan construction"); + static_assert( + is_constructible_v, "mdspan: incompatible extents for mdspan construction"); + + // The following precondition is part of the standard, but is unlikely to be triggered. + // The extents constructor checks this and the mapping must be storing the extents, since + // its extents() function returns a const reference to extents_type. + // The only way this can be triggered is if the mapping conversion constructor would for example + // always construct its extents() only from the dynamic extents, instead of from the other extents. + if constexpr (rank() > 0) { + for (size_t __r = 0; __r < rank(); __r++) { + // Not catching this could lead to out of bounds errors later + // e.g. mdspan, non_checking_layout> m = + // mdspan, non_checking_layout>(ptr, 200); leads to an extent of -56 on m + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + (static_extent(__r) == dynamic_extent) || + (static_cast(__other.extent(__r)) == static_cast(static_extent(__r))), + "mdspan: conversion mismatch of source dynamic extents with static extents"); + } + } + } + + _LIBCPP_HIDE_FROM_ABI constexpr mdspan& operator=(const mdspan&) = default; + _LIBCPP_HIDE_FROM_ABI constexpr mdspan& operator=(mdspan&&) = default; + + //-------------------------------------------------------------------------------- + // [mdspan.mdspan.members], members + + template + requires((is_convertible_v<_OtherIndexTypes, index_type> && ...) && + (is_nothrow_constructible_v && ...) && + (sizeof...(_OtherIndexTypes) == rank())) + _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](_OtherIndexTypes... __indices) const { + // Note the standard layouts would also check this, but user provided ones may not, so we + // check the precondition here + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__is_multidimensional_index_in(extents(), __indices...), + "mdspan: operator[] out of bounds access"); + return __acc_.access(__ptr_, __map_(static_cast(std::move(__indices))...)); + } + + template + requires(is_convertible_v && + is_nothrow_constructible_v) + _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](const array< _OtherIndexType, rank()>& __indices) const { + return __acc_.access(__ptr_, [&](index_sequence<_Idxs...>) { + return __map_(__indices[_Idxs]...); + }(make_index_sequence())); + } + + template + requires(is_convertible_v && + is_nothrow_constructible_v) + _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](span<_OtherIndexType, rank()> __indices) const { + return __acc_.access(__ptr_, [&](index_sequence<_Idxs...>) { + return __map_(__indices[_Idxs]...); + }(make_index_sequence())); + } + + _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { + // Could leave this as only checked in debug mode: semantically size() is never + // guaranteed to be related to any accessible range + _LIBCPP_ASSERT_UNCATEGORIZED( + false == ([&](index_sequence<_Idxs...>) { + size_type __prod = 1; + return (__builtin_mul_overflow(__prod, extent(_Idxs), &__prod) || ... || false); + }(make_index_sequence())), + "mdspan: size() is not representable as size_type"); + return [&](index_sequence<_Idxs...>) { + return ((static_cast(__map_.extents().extent(_Idxs))) * ... * size_type(1)); + }(make_index_sequence()); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const noexcept { + return [&](index_sequence<_Idxs...>) { + return (rank() > 0) && ((__map_.extents().extent(_Idxs) == index_type(0)) || ... || false); + }(make_index_sequence()); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr void swap(mdspan& __x, mdspan& __y) noexcept { + swap(__x.__ptr_, __y.__ptr_); + swap(__x.__map_, __y.__map_); + swap(__x.__acc_, __y.__acc_); + } + + _LIBCPP_HIDE_FROM_ABI constexpr const extents_type& extents() const noexcept { return __map_.extents(); }; + _LIBCPP_HIDE_FROM_ABI constexpr const data_handle_type& data_handle() const noexcept { return __ptr_; }; + _LIBCPP_HIDE_FROM_ABI constexpr const mapping_type& mapping() const noexcept { return __map_; }; + _LIBCPP_HIDE_FROM_ABI constexpr const accessor_type& accessor() const noexcept { return __acc_; }; + + _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_unique() { return mapping_type::is_always_unique(); }; + _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_exhaustive() { return mapping_type::is_always_exhaustive(); }; + _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_strided() { return mapping_type::is_always_strided(); }; + + _LIBCPP_HIDE_FROM_ABI constexpr bool is_unique() const { return __map_.is_unique(); }; + _LIBCPP_HIDE_FROM_ABI constexpr bool is_exhaustive() const { return __map_.is_exhaustive(); }; + _LIBCPP_HIDE_FROM_ABI constexpr bool is_strided() const { return __map_.is_strided(); }; + _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const { return __map_.stride(__r); }; + +private: + _LIBCPP_NO_UNIQUE_ADDRESS data_handle_type __ptr_{}; + _LIBCPP_NO_UNIQUE_ADDRESS mapping_type __map_{}; + _LIBCPP_NO_UNIQUE_ADDRESS accessor_type __acc_{}; + + template + friend class mdspan; +}; + +template + requires((is_convertible_v<_OtherIndexTypes, size_t> && ...) && (sizeof...(_OtherIndexTypes) > 0)) +explicit mdspan(_ElementType*, _OtherIndexTypes...) + -> mdspan<_ElementType, dextents>; + +template + requires(is_pointer_v>) +mdspan(_Pointer&&) -> mdspan>, extents>; + +template + requires(is_array_v<_CArray> && (rank_v<_CArray> == 1)) +mdspan(_CArray&) -> mdspan, extents>>; + +template +mdspan(_ElementType*, const array<_OtherIndexType, _Size>&) -> mdspan<_ElementType, dextents>; + +template +mdspan(_ElementType*, span<_OtherIndexType, _Size>) -> mdspan<_ElementType, dextents>; + +// This one is necessary because all the constructors take `data_handle_type`s, not +// `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which +// seems to throw off automatic deduction guides. +template +mdspan(_ElementType*, const extents<_OtherIndexType, _ExtentsPack...>&) + -> mdspan<_ElementType, extents<_OtherIndexType, _ExtentsPack...>>; + +template +mdspan(_ElementType*, const _MappingType&) + -> mdspan<_ElementType, typename _MappingType::extents_type, typename _MappingType::layout_type>; + +template +mdspan(const typename _AccessorType::data_handle_type, const _MappingType&, const _AccessorType&) + -> mdspan; + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___MDSPAN_MDSPAN_H diff --git a/libcxx/include/mdspan b/libcxx/include/mdspan index 701def50b40a..9082eb8bdb55 100644 --- a/libcxx/include/mdspan +++ b/libcxx/include/mdspan @@ -208,6 +208,135 @@ namespace std { }; } +// mdspan synopsis + +namespace std { + template> + class mdspan { + public: + using extents_type = Extents; + using layout_type = LayoutPolicy; + using accessor_type = AccessorPolicy; + using mapping_type = typename layout_type::template mapping; + using element_type = ElementType; + using value_type = remove_cv_t; + using index_type = typename extents_type::index_type; + using size_type = typename extents_type::size_type; + using rank_type = typename extents_type::rank_type; + using data_handle_type = typename accessor_type::data_handle_type; + using reference = typename accessor_type::reference; + + static constexpr rank_type rank() noexcept { return extents_type::rank(); } + static constexpr rank_type rank_dynamic() noexcept { return extents_type::rank_dynamic(); } + static constexpr size_t static_extent(rank_type r) noexcept + { return extents_type::static_extent(r); } + constexpr index_type extent(rank_type r) const noexcept { return extents().extent(r); } + + // [mdspan.mdspan.cons], constructors + constexpr mdspan(); + constexpr mdspan(const mdspan& rhs) = default; + constexpr mdspan(mdspan&& rhs) = default; + + template + constexpr explicit mdspan(data_handle_type ptr, OtherIndexTypes... exts); + template + constexpr explicit(N != rank_dynamic()) + mdspan(data_handle_type p, span exts); + template + constexpr explicit(N != rank_dynamic()) + mdspan(data_handle_type p, const array& exts); + constexpr mdspan(data_handle_type p, const extents_type& ext); + constexpr mdspan(data_handle_type p, const mapping_type& m); + constexpr mdspan(data_handle_type p, const mapping_type& m, const accessor_type& a); + + template + constexpr explicit(see below) + mdspan(const mdspan& other); + + constexpr mdspan& operator=(const mdspan& rhs) = default; + constexpr mdspan& operator=(mdspan&& rhs) = default; + + // [mdspan.mdspan.members], members + template + constexpr reference operator[](OtherIndexTypes... indices) const; + template + constexpr reference operator[](span indices) const; + template + constexpr reference operator[](const array& indices) const; + + constexpr size_type size() const noexcept; + [[nodiscard]] constexpr bool empty() const noexcept; + + friend constexpr void swap(mdspan& x, mdspan& y) noexcept; + + constexpr const extents_type& extents() const noexcept { return map_.extents(); } + constexpr const data_handle_type& data_handle() const noexcept { return ptr_; } + constexpr const mapping_type& mapping() const noexcept { return map_; } + constexpr const accessor_type& accessor() const noexcept { return acc_; } + + static constexpr bool is_always_unique() + { return mapping_type::is_always_unique(); } + static constexpr bool is_always_exhaustive() + { return mapping_type::is_always_exhaustive(); } + static constexpr bool is_always_strided() + { return mapping_type::is_always_strided(); } + + constexpr bool is_unique() const + { return map_.is_unique(); } + constexpr bool is_exhaustive() const + { return map_.is_exhaustive(); } + constexpr bool is_strided() const + { return map_.is_strided(); } + constexpr index_type stride(rank_type r) const + { return map_.stride(r); } + + private: + accessor_type acc_; // exposition only + mapping_type map_; // exposition only + data_handle_type ptr_; // exposition only + }; + + template + requires(is_array_v && rank_v == 1) + mdspan(CArray&) + -> mdspan, extents>>; + + template + requires(is_pointer_v>) + mdspan(Pointer&&) + -> mdspan>, extents>; + + template + requires((is_convertible_v && ...) && sizeof...(Integrals) > 0) + explicit mdspan(ElementType*, Integrals...) + -> mdspan>; + + template + mdspan(ElementType*, span) + -> mdspan>; + + template + mdspan(ElementType*, const array&) + -> mdspan>; + + template + mdspan(ElementType*, const extents&) + -> mdspan>; + + template + mdspan(ElementType*, const MappingType&) + -> mdspan; + + template + mdspan(const typename AccessorType::data_handle_type&, const MappingType&, + const AccessorType&) + -> mdspan; +} */ #ifndef _LIBCPP_MDSPAN @@ -219,6 +348,7 @@ namespace std { #include <__mdspan/extents.h> #include <__mdspan/layout_left.h> #include <__mdspan/layout_right.h> +#include <__mdspan/mdspan.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 9ff8b67a6a20..0b418d2b7897 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1516,6 +1516,7 @@ module std_private_mdspan_extents [system] { } module std_private_mdspan_layout_left [system] { header "__mdspan/layout_left.h" } module std_private_mdspan_layout_right [system] { header "__mdspan/layout_right.h" } +module std_private_mdspan_mdspan [system] { header "__mdspan/mdspan.h" } module std_private_mdspan_mdspan_fwd [system] { header "__fwd/mdspan.h" } module std_private_memory_addressof [system] { header "__memory/addressof.h" } diff --git a/libcxx/modules/std/mdspan.cppm b/libcxx/modules/std/mdspan.cppm index 5023dfb925ea..d92024d9a77a 100644 --- a/libcxx/modules/std/mdspan.cppm +++ b/libcxx/modules/std/mdspan.cppm @@ -27,5 +27,5 @@ export namespace std { using std::default_accessor; // [mdspan.mdspan], class template mdspan - // using std::mdspan; + using std::mdspan; } // namespace std diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index 2aa65ec070ec..82ab064211d7 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -66,9 +66,16 @@ struct ArchInfo { bool isValidArchName(StringRef Arch); bool getArchFeatures(StringRef Arch, std::vector &Features); +bool isValidTuneCPUName(StringRef TuneCPU); +void fillValidTuneCPUList(SmallVectorImpl &Values); +StringRef getDefaultArch(bool Is64Bit); +void setArch(StringRef Arch); +StringRef getArch(); +void setTuneCPU(StringRef TuneCPU); +StringRef getTuneCPU(); } // namespace LoongArch } // namespace llvm -#endif // LLVM_SUPPORT_LOONGARCHTARGETPARSER_H +#endif // LLVM_TARGETPARSER_LOONGARCHTARGETPARSER_H diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 4e78d9db024c..f780385f7f67 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -126,15 +126,6 @@ class InstCostVisitor : public InstVisitor { SCCPSolver &Solver; ConstMap KnownConstants; - // Basic blocks known to be unreachable after constant propagation. - DenseSet DeadBlocks; - // PHI nodes we have visited before. - DenseSet VisitedPHIs; - // PHI nodes we have visited once without successfully constant folding them. - // Once the InstCostVisitor has processed all the specialization arguments, - // it should be possible to determine whether those PHIs can be folded - // (some of their incoming values may have become constant or dead). - SmallVector PendingPHIs; ConstMap::iterator LastVisited; @@ -143,10 +134,7 @@ class InstCostVisitor : public InstVisitor { TargetTransformInfo &TTI, SCCPSolver &Solver) : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {} - Cost getUserBonus(Instruction *User, Value *Use = nullptr, - Constant *C = nullptr); - - Cost getBonusFromPendingPHIs(); + Cost getUserBonus(Instruction *User, Value *Use, Constant *C); private: friend class InstVisitor; @@ -155,7 +143,6 @@ class InstCostVisitor : public InstVisitor { Cost estimateBranchInst(BranchInst &I); Constant *visitInstruction(Instruction &I) { return nullptr; } - Constant *visitPHINode(PHINode &I); Constant *visitFreezeInst(FreezeInst &I); Constant *visitCallBase(CallBase &I); Constant *visitLoadInst(LoadInst &I); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3994552884c4..647f570ab807 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2628,12 +2628,12 @@ MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol( // function entry point csect instead. And for function delcarations, the // undefined symbols gets treated as csect with XTY_ER property. if (((TM.getFunctionSections() && !Func->hasSection()) || - Func->isDeclaration()) && + Func->isDeclarationForLinker()) && isa(Func)) { return getContext() .getXCOFFSection( NameStr, SectionKind::getText(), - XCOFF::CsectProperties(XCOFF::XMC_PR, Func->isDeclaration() + XCOFF::CsectProperties(XCOFF::XMC_PR, Func->isDeclarationForLinker() ? XCOFF::XTY_ER : XCOFF::XTY_SD)) ->getQualNameSymbol(); diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 7241a5d63526..0675caa3b601 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -117,6 +117,11 @@ include "LoongArchInstrInfo.td" def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; +// Generic 64-bit processor with double-precision floating-point support. +def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, + FeatureUAL, + FeatureBasicD]>; + // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate la32/la64 version. def : ProcessorModel<"generic", NoSchedModel, []>; diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 18b04600dbc6..72781513ff12 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -16,6 +16,9 @@ using namespace llvm; using namespace llvm::LoongArch; +StringRef Arch; +StringRef TuneCPU; + const FeatureInfo AllFeatures[] = { #define LOONGARCH_FEATURE(NAME, KIND) {NAME, KIND}, #include "llvm/TargetParser/LoongArchTargetParser.def" @@ -46,3 +49,25 @@ bool LoongArch::getArchFeatures(StringRef Arch, } return false; } + +bool LoongArch::isValidTuneCPUName(StringRef TuneCPU) { + return isValidArchName(TuneCPU); +} + +void LoongArch::fillValidTuneCPUList(SmallVectorImpl &Values) { + for (const auto A : AllArchs) + Values.emplace_back(A.Name); +} + +StringRef LoongArch::getDefaultArch(bool Is64Bit) { + // TODO: use a real 32-bit arch name. + return Is64Bit ? "loongarch64" : ""; +} + +void LoongArch::setArch(StringRef Name) { Arch = Name; } + +StringRef LoongArch::getArch() { return Arch; } + +void LoongArch::setTuneCPU(StringRef Name) { TuneCPU = Name; } + +StringRef LoongArch::getTuneCPU() { return TuneCPU; } diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 3d6c501e4596..ac5dbc7cfb2a 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -78,11 +78,6 @@ static cl::opt MaxClones( "The maximum number of clones allowed for a single function " "specialization")); -static cl::opt MaxIncomingPhiValues( - "funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc( - "The maximum number of incoming values a PHI node can have to be " - "considered during the specialization bonus estimation")); - static cl::opt MinFunctionSize( "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc( "Don't specialize functions that have less than this number of " @@ -109,7 +104,6 @@ static cl::opt SpecializeLiteralConstant( // the combination of size and latency savings in comparison to the non // specialized version of the function. static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, - DenseSet &DeadBlocks, ConstMap &KnownConstants, SCCPSolver &Solver, BlockFrequencyInfo &BFI, TargetTransformInfo &TTI) { @@ -124,12 +118,6 @@ static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, if (!Weight) continue; - // These blocks are considered dead as far as the InstCostVisitor is - // concerned. They haven't been proven dead yet by the Solver, but - // may become if we propagate the constant specialization arguments. - if (!DeadBlocks.insert(BB).second) - continue; - for (Instruction &I : *BB) { // Disregard SSA copies. if (auto *II = dyn_cast(&I)) @@ -164,19 +152,9 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { return nullptr; } -Cost InstCostVisitor::getBonusFromPendingPHIs() { - Cost Bonus = 0; - while (!PendingPHIs.empty()) { - Instruction *Phi = PendingPHIs.pop_back_val(); - Bonus += getUserBonus(Phi); - } - return Bonus; -} - Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { // Cache the iterator before visiting. - LastVisited = Use ? KnownConstants.insert({Use, C}).first - : KnownConstants.end(); + LastVisited = KnownConstants.insert({Use, C}).first; if (auto *I = dyn_cast(User)) return estimateSwitchInst(*I); @@ -203,15 +181,13 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { for (auto *U : User->users()) if (auto *UI = dyn_cast(U)) - if (UI != User && Solver.isBlockExecutable(UI->getParent())) + if (Solver.isBlockExecutable(UI->getParent())) Bonus += getUserBonus(UI, User, C); return Bonus; } Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return 0; @@ -232,13 +208,10 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { WorkList.push_back(BB); } - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); } Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return 0; @@ -250,39 +223,10 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { Succ->getUniquePredecessor() == I.getParent()) WorkList.push_back(Succ); - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); -} - -Constant *InstCostVisitor::visitPHINode(PHINode &I) { - if (I.getNumIncomingValues() > MaxIncomingPhiValues) - return nullptr; - - bool Inserted = VisitedPHIs.insert(&I).second; - Constant *Const = nullptr; - - for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) { - Value *V = I.getIncomingValue(Idx); - if (auto *Inst = dyn_cast(V)) - if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx))) - continue; - Constant *C = findConstantFor(V, KnownConstants); - if (!C) { - if (Inserted) - PendingPHIs.push_back(&I); - return nullptr; - } - if (!Const) - Const = C; - else if (C != Const) - return nullptr; - } - return Const; + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); } Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second)) return LastVisited->second; return nullptr; @@ -309,8 +253,6 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) { } Constant *InstCostVisitor::visitLoadInst(LoadInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (isa(LastVisited->second)) return nullptr; return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL); @@ -333,8 +275,6 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { } Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - if (I.getCondition() != LastVisited->first) return nullptr; @@ -350,8 +290,6 @@ Constant *InstCostVisitor::visitCastInst(CastInst &I) { } Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; Value *V = Swap ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); @@ -365,14 +303,10 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { } Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL); } Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { - assert(LastVisited != KnownConstants.end() && "Invalid iterator!"); - bool Swap = I.getOperand(1) == LastVisited->first; Value *V = Swap ? I.getOperand(0) : I.getOperand(1); Constant *Other = findConstantFor(V, KnownConstants); @@ -779,17 +713,13 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - Cost Score = 0; + Cost Score = 0 - SpecCost; InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); - Score += Visitor.getBonusFromPendingPHIs(); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = " - << Score << "\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && Score <= SpecCost) + if (!ForceSpecialization && Score <= 0) continue; // Create a new specialisation entry. diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h index 5ac7f6d1e4e6..7736ba853163 100644 --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,13 +55,12 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED - +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS From c938c0a643200ec844981864ac587bc6c1f576aa Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 2 Sep 2023 16:29:16 +0200 Subject: [PATCH 2/3] Vendor import of llvm-project branch release/17.x llvmorg-17.0.0-rc3-79-ga612cb0b81d8. --- clang/include/clang/AST/DeclBase.h | 6 +- clang/include/clang/Basic/CodeGenOptions.def | 1 - .../include/clang/Basic/DiagnosticASTKinds.td | 2 + clang/include/clang/Basic/DiagnosticGroups.td | 1 + .../include/clang/Basic/DiagnosticLexKinds.td | 4 + clang/include/clang/Basic/Sanitizers.h | 4 + clang/include/clang/Basic/TargetInfo.h | 4 +- clang/include/clang/Basic/riscv_vector.td | 52 +--- clang/include/clang/CodeGen/CGFunctionInfo.h | 29 +- clang/include/clang/Driver/Options.td | 14 +- clang/include/clang/Driver/ToolChain.h | 2 +- clang/include/clang/Sema/Sema.h | 2 - clang/lib/AST/ASTContext.cpp | 5 +- clang/lib/AST/ExprConstant.cpp | 27 +- clang/lib/Basic/Targets/LoongArch.cpp | 22 +- clang/lib/Basic/Targets/LoongArch.h | 14 +- clang/lib/Basic/Targets/RISCV.cpp | 4 +- clang/lib/CodeGen/ABIInfoImpl.cpp | 13 +- clang/lib/CodeGen/ABIInfoImpl.h | 14 +- clang/lib/CodeGen/BackendUtil.cpp | 23 +- clang/lib/CodeGen/CGCXXABI.cpp | 3 +- clang/lib/CodeGen/CGCall.cpp | 244 ++++++++++------- clang/lib/CodeGen/CGCall.h | 29 ++ clang/lib/CodeGen/CGClass.cpp | 106 +++++++- clang/lib/CodeGen/CGCoroutine.cpp | 33 +++ clang/lib/CodeGen/CGDebugInfo.cpp | 13 +- clang/lib/CodeGen/CGDebugInfo.h | 2 +- clang/lib/CodeGen/CGDecl.cpp | 2 +- clang/lib/CodeGen/CGDeclCXX.cpp | 4 +- clang/lib/CodeGen/CGExpr.cpp | 9 +- clang/lib/CodeGen/CGExprConstant.cpp | 2 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 11 +- clang/lib/CodeGen/CodeGenABITypes.cpp | 5 +- clang/lib/CodeGen/CodeGenFunction.cpp | 26 +- clang/lib/CodeGen/CodeGenFunction.h | 19 +- clang/lib/CodeGen/CodeGenModule.cpp | 32 ++- clang/lib/CodeGen/CodeGenModule.h | 20 +- clang/lib/CodeGen/CodeGenTypes.h | 12 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 2 +- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 3 +- clang/lib/CodeGen/Targets/LoongArch.cpp | 11 +- clang/lib/CodeGen/Targets/RISCV.cpp | 24 +- clang/lib/CodeGen/Targets/X86.cpp | 16 +- clang/lib/Driver/Driver.cpp | 9 +- clang/lib/Driver/SanitizerArgs.cpp | 32 +++ clang/lib/Driver/ToolChain.cpp | 6 + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 50 ++-- clang/lib/Driver/ToolChains/Arch/LoongArch.h | 6 + clang/lib/Driver/ToolChains/Arch/X86.cpp | 14 +- clang/lib/Driver/ToolChains/Arch/X86.h | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 31 +-- clang/lib/Driver/ToolChains/CommonArgs.cpp | 11 +- clang/lib/Driver/ToolChains/Gnu.cpp | 22 +- clang/lib/Driver/ToolChains/Hexagon.cpp | 5 + clang/lib/Driver/ToolChains/Solaris.cpp | 41 ++- clang/lib/Format/UnwrappedLineParser.cpp | 5 +- clang/lib/Headers/__clang_cuda_math.h | 2 +- .../Headers/__clang_hip_libdevice_declares.h | 2 +- clang/lib/Headers/cpuid.h | 10 - clang/lib/Interpreter/IncrementalExecutor.cpp | 19 +- clang/lib/Lex/LiteralSupport.cpp | 41 ++- clang/lib/Parse/ParseDeclCXX.cpp | 19 +- clang/lib/Parse/ParseTentative.cpp | 1 + clang/lib/Sema/SemaAvailability.cpp | 12 + clang/lib/Sema/SemaCast.cpp | 8 + clang/lib/Sema/SemaDecl.cpp | 3 +- clang/lib/Sema/SemaExpr.cpp | 95 +++---- clang/lib/Sema/SemaLookup.cpp | 68 +++-- clang/lib/Sema/TreeTransform.h | 4 + clang/lib/Serialization/ASTReaderDecl.cpp | 66 +++-- clang/lib/Serialization/ASTWriterDecl.cpp | 4 +- compiler-rt/lib/asan/asan_interceptors.cpp | 56 ++-- compiler-rt/lib/asan/asan_interceptors.h | 2 - compiler-rt/lib/asan/asan_win_dll_thunk.cpp | 2 + compiler-rt/lib/builtins/clear_cache.c | 2 +- compiler-rt/lib/builtins/cpu_model.c | 5 +- compiler-rt/lib/interception/interception.h | 2 +- compiler-rt/lib/msan/msan_interceptors.cpp | 37 +++ compiler-rt/lib/profile/InstrProfilingFile.c | 10 +- .../sanitizer_common_interceptors.inc | 73 +++-- .../sanitizer_stacktrace_sparc.cpp | 6 - .../sanitizer_unwind_linux_libcdep.cpp | 6 - .../symbolizer/scripts/global_symbols.txt | 7 + libcxx/include/__algorithm/pstl_sort.h | 1 + libcxx/include/__format/format_functions.h | 3 + .../locale_base_api/locale_guard.h | 1 + libcxx/include/__mdspan/layout_left.h | 2 +- libcxx/include/__std_clang_module | 226 ++++++++++++++++ .../__type_traits/is_nothrow_constructible.h | 3 +- libcxx/include/__type_traits/remove_cv.h | 2 +- libcxx/include/__type_traits/remove_cvref.h | 2 +- libcxx/include/module.modulemap.in | 64 ++--- libcxx/include/sstream | 50 ++-- libcxx/modules/std/atomic.cppm | 3 - libcxx/modules/std/execution.cppm | 2 +- libcxx/modules/std/filesystem.cppm | 4 +- libcxx/src/chrono.cpp | 2 +- libcxx/src/filesystem/filesystem_clock.cpp | 2 +- libunwind/src/Unwind-EHABI.cpp | 7 +- lld/ELF/Arch/LoongArch.cpp | 7 + lld/ELF/Arch/PPC.cpp | 12 +- lld/ELF/Arch/PPC64.cpp | 86 ++++-- lld/ELF/Target.h | 1 + lld/docs/ReleaseNotes.rst | 5 + .../GNUstepObjCRuntime/GNUstepObjCRuntime.cpp | 42 ++- .../Utility/RegisterContextPOSIX_arm64.cpp | 4 + .../Utility/RegisterContextPOSIX_arm64.h | 1 + .../Process/Utility/RegisterInfoPOSIX_arm64.h | 1 + .../RegisterContextPOSIXCore_arm64.cpp | 14 + .../elf-core/RegisterContextPOSIXCore_arm64.h | 1 + .../Process/elf-core/RegisterUtilities.h | 4 + llvm/include/llvm/ADT/FunctionExtras.h | 12 +- llvm/include/llvm/ADT/SmallVector.h | 4 +- llvm/include/llvm/Analysis/RegionInfoImpl.h | 4 +- llvm/include/llvm/Analysis/ValueTracking.h | 4 - .../include/llvm/CodeGen/CodeGenPassBuilder.h | 2 +- llvm/include/llvm/CodeGen/LowLevelType.h | 7 +- .../llvm/CodeGen/PreISelIntrinsicLowering.h | 4 + llvm/include/llvm/CodeGen/TargetInstrInfo.h | 17 -- llvm/include/llvm/Object/Wasm.h | 10 +- llvm/include/llvm/ObjectYAML/WasmYAML.h | 1 + llvm/include/llvm/Option/ArgList.h | 1 + llvm/include/llvm/Support/type_traits.h | 38 --- .../llvm/TargetParser/LoongArchTargetParser.h | 8 +- .../AggressiveInstCombine.h | 2 +- .../llvm/Transforms/Scalar/MemCpyOptimizer.h | 4 - llvm/lib/Analysis/ValueTracking.cpp | 7 - llvm/lib/CodeGen/CalcSpillWeights.cpp | 15 +- .../lib/CodeGen/ComplexDeinterleavingPass.cpp | 18 +- llvm/lib/CodeGen/InlineSpiller.cpp | 34 ++- llvm/lib/CodeGen/LiveRangeEdit.cpp | 3 +- llvm/lib/CodeGen/LiveRangeShrink.cpp | 4 +- llvm/lib/CodeGen/MachineLICM.cpp | 4 + llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 54 ++-- llvm/lib/CodeGen/RegAllocGreedy.cpp | 21 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 + .../SelectionDAG/SelectionDAGBuilder.cpp | 99 ++++--- llvm/lib/CodeGen/SplitKit.cpp | 17 +- llvm/lib/CodeGen/SplitKit.h | 7 +- llvm/lib/CodeGen/TargetInstrInfo.cpp | 7 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 17 -- llvm/lib/LTO/LTO.cpp | 7 +- llvm/lib/ObjCopy/wasm/WasmObject.h | 1 + llvm/lib/ObjCopy/wasm/WasmReader.cpp | 4 +- llvm/lib/ObjCopy/wasm/WasmWriter.cpp | 13 +- llvm/lib/Object/SymbolSize.cpp | 17 +- llvm/lib/Object/WasmObjectFile.cpp | 4 + llvm/lib/ObjectYAML/WasmEmitter.cpp | 12 +- llvm/lib/ObjectYAML/WasmYAML.cpp | 1 + llvm/lib/Option/ArgList.cpp | 7 + llvm/lib/TableGen/TGParser.cpp | 9 +- llvm/lib/Target/AArch64/AArch64.td | 6 +- .../Target/AArch64/AArch64FrameLowering.cpp | 13 +- .../Target/AArch64/AArch64ISelLowering.cpp | 19 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 9 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 11 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 20 +- .../AArch64/AArch64LoadStoreOptimizer.cpp | 8 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 49 ++-- llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 +- .../AArch64/GISel/AArch64CallLowering.cpp | 5 + llvm/lib/Target/AArch64/SVEInstrFormats.td | 7 + llvm/lib/Target/AMDGPU/AMDGPU.h | 4 - llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 37 ++- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 47 +++- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 +- .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 16 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 - llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 16 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 7 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 45 +-- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 19 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 7 - llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 136 ++-------- llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp | 141 ---------- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 69 ++--- .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 39 ++- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 23 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 17 +- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 + .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 6 +- .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 26 +- llvm/lib/Target/BPF/BTFDebug.cpp | 2 + .../Target/PowerPC/AsmParser/PPCAsmParser.cpp | 50 +++- .../PowerPC/MCTargetDesc/PPCInstPrinter.cpp | 14 +- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 37 +-- llvm/lib/Target/PowerPC/PPCInstrFormats.td | 6 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 9 + llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 4 - llvm/lib/Target/PowerPC/PPCScheduleP9.td | 2 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 15 +- .../RISCV/RISCVExpandAtomicPseudoInsts.cpp | 9 + llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 39 +-- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 13 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 26 +- .../Target/RISCV/RISCVPushPopOptimizer.cpp | 3 +- llvm/lib/Target/Sparc/SparcInstrInfo.td | 16 ++ .../SystemZ/SystemZTargetTransformInfo.cpp | 5 + llvm/lib/Target/X86/X86.td | 7 + llvm/lib/Target/X86/X86ISelLowering.cpp | 96 ++++--- llvm/lib/Target/X86/X86ISelLowering.h | 2 - llvm/lib/Target/X86/X86InstrAVX512.td | 10 + llvm/lib/Target/X86/X86InstrSSE.td | 5 + .../lib/Target/X86/X86TargetTransformInfo.cpp | 14 +- llvm/lib/Target/X86/X86TargetTransformInfo.h | 1 + llvm/lib/TargetParser/Host.cpp | 10 +- .../TargetParser/LoongArchTargetParser.cpp | 17 +- .../AggressiveInstCombine.cpp | 217 ++++----------- llvm/lib/Transforms/Coroutines/CoroElide.cpp | 83 ++++-- .../InstCombine/InstructionCombining.cpp | 2 +- .../ControlHeightReduction.cpp | 14 + .../Instrumentation/GCOVProfiling.cpp | 4 +- .../Scalar/ConstraintElimination.cpp | 2 +- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 256 +----------------- .../Scalar/TailRecursionElimination.cpp | 6 + .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 14 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +- 221 files changed, 2488 insertions(+), 2070 deletions(-) create mode 100644 libcxx/include/__std_clang_module delete mode 100644 llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 1b99709ca90d..12137387b676 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1702,7 +1702,7 @@ class DeclContext { }; /// Number of non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = 30 }; + enum { NumFunctionDeclBits = 31 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1714,12 +1714,12 @@ class DeclContext { /// For the bits in FunctionDeclBitfields. uint64_t : NumFunctionDeclBits; - /// 21 bits to fit in the remaining available space. + /// 20 bits to fit in the remaining available space. /// Note that this makes CXXConstructorDeclBitfields take /// exactly 64 bits and thus the width of NumCtorInitializers /// will need to be shrunk if some bit is added to NumDeclContextBitfields, /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields. - uint64_t NumCtorInitializers : 18; + uint64_t NumCtorInitializers : 17; uint64_t IsInheritingConstructor : 1; /// Whether this constructor has a trail-allocated explicit specifier. diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 11aec88c5335..d492b8681c5d 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -165,7 +165,6 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(LTOUnit, 1, 0) ///< Emit IR to support LTO unit features (CFI, whole ///< program vtable opt). -CODEGENOPT(FatLTO, 1, 0) ///< Set when -ffat-lto-objects is enabled. CODEGENOPT(EnableSplitLTOUnit, 1, 0) ///< Enable LTO unit splitting to support /// CFI and traditional whole program /// devirtualization that require whole diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 566cdc340605..0794ed7ba683 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -70,6 +70,8 @@ def note_consteval_address_accessible : Note< "is not a constant expression">; def note_constexpr_uninitialized : Note< "subobject %0 is not initialized">; +def note_constexpr_uninitialized_base : Note< + "constructor of base class %0 is not called">; def note_constexpr_static_local : Note< "control flows through the definition of a %select{static|thread_local}0 variable">; def note_constexpr_subobject_declared_here : Note< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 7b4d415bf064..26bc88a980e4 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -20,6 +20,7 @@ def DeprecatedStaticAnalyzerFlag : DiagGroup<"deprecated-static-analyzer-flag">; // Empty DiagGroups are recognized by clang but ignored. def ODR : DiagGroup<"odr">; def : DiagGroup<"abi">; +def : DiagGroup<"gnu-empty-initializer">; // Now a C extension, not GNU. def AbsoluteValue : DiagGroup<"absolute-value">; def MisspelledAssumption : DiagGroup<"misspelled-assumption">; def UnknownAssumption : DiagGroup<"unknown-assumption">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 0eb270aeea0e..6ad691975bd5 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -285,6 +285,10 @@ def ext_ms_reserved_user_defined_literal : ExtWarn< def err_unsupported_string_concat : Error< "unsupported non-standard concatenation of string literals">; +def warn_unevaluated_string_prefix : Warning< + "encoding prefix '%0' on an unevaluated string literal has no effect" + "%select{| and is incompatible with c++2c}1">, + InGroup>; def err_unevaluated_string_prefix : Error< "an unevaluated string literal cannot have an encoding prefix">; def err_unevaluated_string_udl : Error< diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h index db53010645ae..4659e45c7883 100644 --- a/clang/include/clang/Basic/Sanitizers.h +++ b/clang/include/clang/Basic/Sanitizers.h @@ -23,7 +23,11 @@ namespace llvm { class hash_code; +class Triple; +namespace opt { +class ArgList; } +} // namespace llvm namespace clang { diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 41ef47eb565b..61be52149341 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1414,7 +1414,9 @@ class TargetInfo : public TransferrableTargetInfo, /// Identify whether this target supports IFuncs. bool supportsIFunc() const { - return getTriple().isOSBinFormatELF() && !getTriple().isOSFuchsia(); + return getTriple().isOSBinFormatELF() && + ((getTriple().isOSLinux() && !getTriple().isMusl()) || + getTriple().isOSFreeBSD()); } // Validate the contents of the __builtin_cpu_supports(const char*) diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index 7e5889812aec..6adc60031341 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -112,7 +112,7 @@ multiclass RVVIntBinBuiltinSet multiclass RVVSlideOneBuiltinSet : RVVOutOp1BuiltinSet; + ["vx", "Uv", "UvUvUe"]]>; multiclass RVVSignedShiftBuiltinSet : RVVOutOp1BuiltinSet paramInfos, - CanQualType resultType, - ArrayRef argTypes, - RequiredArgs required); + static CGFunctionInfo * + create(unsigned llvmCC, bool instanceMethod, bool chainCall, + bool delegateCall, const FunctionType::ExtInfo &extInfo, + ArrayRef paramInfos, CanQualType resultType, + ArrayRef argTypes, RequiredArgs required); void operator delete(void *p) { ::operator delete(p); } // Friending class TrailingObjects is apparently not good enough for MSVC, @@ -663,6 +664,8 @@ class CGFunctionInfo final bool isChainCall() const { return ChainCall; } + bool isDelegateCall() const { return DelegateCall; } + bool isCmseNSCall() const { return CmseNSCall; } bool isNoReturn() const { return NoReturn; } @@ -749,6 +752,7 @@ class CGFunctionInfo final ID.AddInteger(getASTCallingConvention()); ID.AddBoolean(InstanceMethod); ID.AddBoolean(ChainCall); + ID.AddBoolean(DelegateCall); ID.AddBoolean(NoReturn); ID.AddBoolean(ReturnsRetained); ID.AddBoolean(NoCallerSavedRegs); @@ -766,17 +770,16 @@ class CGFunctionInfo final for (const auto &I : arguments()) I.type.Profile(ID); } - static void Profile(llvm::FoldingSetNodeID &ID, - bool InstanceMethod, - bool ChainCall, + static void Profile(llvm::FoldingSetNodeID &ID, bool InstanceMethod, + bool ChainCall, bool IsDelegateCall, const FunctionType::ExtInfo &info, ArrayRef paramInfos, - RequiredArgs required, - CanQualType resultType, + RequiredArgs required, CanQualType resultType, ArrayRef argTypes) { ID.AddInteger(info.getCC()); ID.AddBoolean(InstanceMethod); ID.AddBoolean(ChainCall); + ID.AddBoolean(IsDelegateCall); ID.AddBoolean(info.getNoReturn()); ID.AddBoolean(info.getProducesResult()); ID.AddBoolean(info.getNoCallerSavedRegs()); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 229f6141c750..e04f67bdb1fa 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2375,11 +2375,6 @@ def fthin_link_bitcode_EQ : Joined<["-"], "fthin-link-bitcode=">, Flags<[CoreOption, CC1Option]>, Group, HelpText<"Write minimized bitcode to for the ThinLTO thin link only">, MarshallingInfoString>; -defm fat_lto_objects : BoolFOption<"fat-lto-objects", - CodeGenOpts<"FatLTO">, DefaultFalse, - PosFlag, - NegFlag, - BothFlags<[CC1Option], " fat LTO object support">>; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group, Flags<[NoXarchOption, CC1Option, CoreOption]>, HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">, @@ -5097,6 +5092,10 @@ def mretpoline_external_thunk : Flag<["-"], "mretpoline-external-thunk">, Group< def mno_retpoline_external_thunk : Flag<["-"], "mno-retpoline-external-thunk">, Group; def mvzeroupper : Flag<["-"], "mvzeroupper">, Group; def mno_vzeroupper : Flag<["-"], "mno-vzeroupper">, Group; +def mno_gather : Flag<["-"], "mno-gather">, Group, + HelpText<"Disable generation of gather instructions in auto-vectorization(x86 only)">; +def mno_scatter : Flag<["-"], "mno-scatter">, Group, + HelpText<"Disable generation of scatter instructions in auto-vectorization(x86 only)">; // These are legacy user-facing driver-level option spellings. They are always // aliases for options that are spelled using the more common Unix / GNU flag @@ -5162,6 +5161,7 @@ defm caller_saves : BooleanFFlag<"caller-saves">, Group, Group; defm branch_count_reg : BooleanFFlag<"branch-count-reg">, Group; defm default_inline : BooleanFFlag<"default-inline">, Group; +defm fat_lto_objects : BooleanFFlag<"fat-lto-objects">, Group; defm float_store : BooleanFFlag<"float-store">, Group; defm friend_injection : BooleanFFlag<"friend-injection">, Group; defm function_attribute_list : BooleanFFlag<"function-attribute-list">, Group; @@ -7152,6 +7152,10 @@ def _SLASH_QIntel_jcc_erratum : CLFlag<"QIntel-jcc-erratum">, Alias; def _SLASH_arm64EC : CLFlag<"arm64EC">, HelpText<"Set build target to arm64ec">; +def : CLFlag<"Qgather-">, Alias, + HelpText<"Disable generation of gather instructions in auto-vectorization(x86 only)">; +def : CLFlag<"Qscatter-">, Alias, + HelpText<"Disable generation of scatter instructions in auto-vectorization(x86 only)">; // Non-aliases: diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index e3fcbd9322b0..2e74507f7126 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -561,7 +561,7 @@ class ToolChain { // Return the DWARF version to emit, in the absence of arguments // to the contrary. - virtual unsigned GetDefaultDwarfVersion() const { return 5; } + virtual unsigned GetDefaultDwarfVersion() const; // Some toolchains may have different restrictions on the DWARF version and // may need to adjust it. E.g. NVPTX may need to enforce DWARF2 even when host diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3418a37b3077..cfd1c0f977c0 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12694,8 +12694,6 @@ class Sema final { QualType CheckBitwiseOperands( // C99 6.5.[10...12] ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc); - void diagnoseLogicalInsteadOfBitwise(Expr *Op1, Expr *Op2, SourceLocation Loc, - BinaryOperatorKind Opc); QualType CheckLogicalOperands( // C99 6.5.[13,14] ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 7acacd7bf4f5..76000156fece 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9612,9 +9612,8 @@ bool ASTContext::areLaxCompatibleRVVTypes(QualType FirstType, const LangOptions::LaxVectorConversionKind LVCKind = getLangOpts().getLaxVectorConversions(); - // If __riscv_v_fixed_vlen != N do not allow GNU vector lax conversion. - if (VecTy->getVectorKind() == VectorType::GenericVector && - getTypeSize(SecondType) != getRVVTypeSize(*this, BT)) + // If __riscv_v_fixed_vlen != N do not allow vector lax conversion. + if (getTypeSize(SecondType) != getRVVTypeSize(*this, BT)) return false; // If -flax-vector-conversions=all is specified, the types are diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f1c842e26199..f1bad0c7f7f2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -2418,9 +2418,16 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, if (const CXXRecordDecl *CD = dyn_cast(RD)) { unsigned BaseIndex = 0; for (const CXXBaseSpecifier &BS : CD->bases()) { - if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), - Value.getStructBase(BaseIndex), Kind, - /*SubobjectDecl=*/nullptr, CheckedTemps)) + const APValue &BaseValue = Value.getStructBase(BaseIndex); + if (!BaseValue.hasValue()) { + SourceLocation TypeBeginLoc = BS.getBaseTypeLoc(); + Info.FFDiag(TypeBeginLoc, diag::note_constexpr_uninitialized_base) + << BS.getType() << SourceRange(TypeBeginLoc, BS.getEndLoc()); + return false; + } + if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), BaseValue, + Kind, /*SubobjectDecl=*/nullptr, + CheckedTemps)) return false; ++BaseIndex; } @@ -15218,14 +15225,6 @@ static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, return true; } - // FIXME: Evaluating values of large array and record types can cause - // performance problems. Only do so in C++11 for now. - if (Exp->isPRValue() && - (Exp->getType()->isArrayType() || Exp->getType()->isRecordType()) && - !Ctx.getLangOpts().CPlusPlus11) { - IsConst = false; - return true; - } return false; } @@ -15467,12 +15466,6 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, return Name; }); - // FIXME: Evaluating initializers for large array and record types can cause - // performance problems. Only do so in C++11 for now. - if (isPRValue() && (getType()->isArrayType() || getType()->isRecordType()) && - !Ctx.getLangOpts().CPlusPlus11) - return false; - Expr::EvalStatus EStatus; EStatus.Diag = &Notes; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index f08e5e732b03..4448a2ae10a1 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -199,18 +199,14 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__loongarch_frlen", "0"); // Define __loongarch_arch. - StringRef Arch = llvm::LoongArch::getArch(); - if (Arch.empty()) - Arch = llvm::LoongArch::getDefaultArch(GRLen == 64); - if (!Arch.empty()) - Builder.defineMacro("__loongarch_arch", Arch); + StringRef ArchName = getCPU(); + Builder.defineMacro("__loongarch_arch", Twine('"') + ArchName + Twine('"')); // Define __loongarch_tune. - StringRef TuneCPU = llvm::LoongArch::getTuneCPU(); + StringRef TuneCPU = getTargetOpts().TuneCPU; if (TuneCPU.empty()) - TuneCPU = Arch; - if (!TuneCPU.empty()) - Builder.defineMacro("__loongarch_tune", TuneCPU); + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") @@ -283,11 +279,11 @@ bool LoongArchTargetInfo::handleTargetFeatures( return true; } -bool LoongArchTargetInfo::isValidTuneCPUName(StringRef Name) const { - return llvm::LoongArch::isValidTuneCPUName(Name); +bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { + return llvm::LoongArch::isValidCPUName(Name); } -void LoongArchTargetInfo::fillValidTuneCPUList( +void LoongArchTargetInfo::fillValidCPUList( SmallVectorImpl &Values) const { - llvm::LoongArch::fillValidTuneCPUList(Values); + llvm::LoongArch::fillValidCPUList(Values); } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 60d545566b30..34143f462a24 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -24,6 +24,7 @@ namespace targets { class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { protected: std::string ABI; + std::string CPU; bool HasFeatureD; bool HasFeatureF; @@ -40,6 +41,15 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { WIntType = UnsignedInt; } + bool setCPU(const std::string &Name) override { + if (!isValidCPUName(Name)) + return false; + CPU = Name; + return true; + } + + StringRef getCPU() const { return CPU; } + StringRef getABI() const override { return ABI; } void getTargetDefines(const LangOptions &Opts, @@ -81,8 +91,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool hasFeature(StringRef Feature) const override; - bool isValidTuneCPUName(StringRef Name) const override; - void fillValidTuneCPUList(SmallVectorImpl &Values) const override; + bool isValidCPUName(StringRef Name) const override; + void fillValidCPUList(SmallVectorImpl &Values) const override; }; class LLVM_LIBRARY_VISIBILITY LoongArch32TargetInfo diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 94c894dfec0b..d55ab76395c8 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -196,8 +196,8 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, if (ISAInfo->hasExtension("zve32x")) { Builder.defineMacro("__riscv_vector"); - // Currently we support the v0.11 RISC-V V intrinsics. - Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(0, 11))); + // Currently we support the v0.12 RISC-V V intrinsics. + Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(0, 12))); } auto VScale = getVScaleRange(Opts); diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index 7c30cecfdb9b..2b20d5a13346 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -246,7 +246,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction &CGF, Address Addr1, } bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD, - bool AllowArrays) { + bool AllowArrays, bool AsIfNoUniqueAddr) { if (FD->isUnnamedBitfield()) return true; @@ -280,13 +280,14 @@ bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD, // not arrays of records, so we must also check whether we stripped off an // array type above. if (isa(RT->getDecl()) && - (WasArray || !FD->hasAttr())) + (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr()))) return false; - return isEmptyRecord(Context, FT, AllowArrays); + return isEmptyRecord(Context, FT, AllowArrays, AsIfNoUniqueAddr); } -bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { +bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr) { const RecordType *RT = T->getAs(); if (!RT) return false; @@ -297,11 +298,11 @@ bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) - if (!isEmptyRecord(Context, I.getType(), true)) + if (!isEmptyRecord(Context, I.getType(), true, AsIfNoUniqueAddr)) return false; for (const auto *I : RD->fields()) - if (!isEmptyField(Context, I, AllowArrays)) + if (!isEmptyField(Context, I, AllowArrays, AsIfNoUniqueAddr)) return false; return true; } diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h index 5f0cc289af68..afde08ba100c 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.h +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -122,13 +122,19 @@ Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block2, const llvm::Twine &Name = ""); /// isEmptyField - Return true iff a the field is "empty", that is it -/// is an unnamed bit-field or an (array of) empty record(s). -bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays); +/// is an unnamed bit-field or an (array of) empty record(s). If +/// AsIfNoUniqueAddr is true, then C++ record fields are considered empty if +/// the [[no_unique_address]] attribute would have made them empty. +bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not -/// considered empty. -bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); +/// considered empty. If AsIfNoUniqueAddr is true, then C++ record fields are +/// considered empty if the [[no_unique_address]] attribute would have made +/// them empty. +bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cda03d69522d..483f3e787a78 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -55,7 +55,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/SubtargetFeature.h" #include "llvm/TargetParser/Triple.h" -#include "llvm/Transforms/IPO/EmbedBitcodePass.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -1016,12 +1015,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( }); } - bool IsThinOrUnifiedLTO = IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO); - if (CodeGenOpts.FatLTO) { - MPM = PB.buildFatLTODefaultPipeline(Level, IsThinOrUnifiedLTO, - IsThinOrUnifiedLTO || - shouldEmitRegularLTOSummary()); - } else if (IsThinOrUnifiedLTO) { + if (IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO)) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level); @@ -1077,21 +1071,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline( EmitLTOSummary)); } } - if (CodeGenOpts.FatLTO) { - // Set module flags, like EnableSplitLTOUnit and UnifiedLTO, since FatLTO - // uses a different action than Backend_EmitBC or Backend_EmitLL. - bool IsThinOrUnifiedLTO = - CodeGenOpts.PrepareForThinLTO || - (CodeGenOpts.PrepareForLTO && CodeGenOpts.UnifiedLTO); - if (!TheModule->getModuleFlag("ThinLTO")) - TheModule->addModuleFlag(Module::Error, "ThinLTO", - uint32_t(IsThinOrUnifiedLTO)); - if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - uint32_t(CodeGenOpts.EnableSplitLTOUnit)); - if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO")) - TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); - } // Now that we have all of the passes ready, run them. { diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 7b77dd7875bc..4df6f6505ef6 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -312,8 +312,7 @@ void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { // Delegate back to CGM by default. - return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*IsConstantVariable=*/false); + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage); } bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bd272e016e92..0d1e9ad439b7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -13,6 +13,7 @@ #include "CGCall.h" #include "ABIInfo.h" +#include "ABIInfoImpl.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" @@ -112,8 +113,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual FTNP) { // When translating an unprototyped function type, always use a // variadic type. return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), - /*instanceMethod=*/false, - /*chainCall=*/false, std::nullopt, + FnInfoOpts::None, std::nullopt, FTNP->getExtInfo(), {}, RequiredArgs(0)); } @@ -189,10 +189,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); - return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod, - /*chainCall=*/false, prefix, - FTP->getExtInfo(), paramInfos, - Required); + FnInfoOpts opts = + instanceMethod ? FnInfoOpts::IsInstanceMethod : FnInfoOpts::None; + return CGT.arrangeLLVMFunctionInfo(resultType, opts, prefix, + FTP->getExtInfo(), paramInfos, Required); } /// Arrange the argument and result information for a value of the @@ -271,7 +271,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, argTypes.push_back(DeriveThisType(RD, MD)); return ::arrangeLLVMFunctionInfo( - *this, true, argTypes, + *this, /*instanceMethod=*/true, argTypes, FTP->getCanonicalTypeUnqualified().getAs()); } @@ -363,9 +363,8 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { : TheCXXABI.hasMostDerivedReturn(GD) ? CGM.getContext().VoidPtrTy : Context.VoidTy; - return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, extInfo, - paramInfos, required); + return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod, + argTypes, extInfo, paramInfos, required); } static SmallVector @@ -439,9 +438,9 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs, ArgTypes.size()); } - return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true, - /*chainCall=*/false, ArgTypes, Info, - ParamInfos, Required); + + return arrangeLLVMFunctionInfo(ResultType, FnInfoOpts::IsInstanceMethod, + ArgTypes, Info, ParamInfos, Required); } /// Arrange the argument and result information for the declaration or @@ -460,10 +459,9 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { // When declaring a function without a prototype, always use a // non-variadic type. if (CanQual noProto = FTy.getAs()) { - return arrangeLLVMFunctionInfo( - noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {}, - RequiredArgs::All); + return arrangeLLVMFunctionInfo(noProto->getReturnType(), FnInfoOpts::None, + std::nullopt, noProto->getExtInfo(), {}, + RequiredArgs::All); } return arrangeFreeFunctionType(FTy.castAs()); @@ -512,9 +510,9 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, RequiredArgs required = (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All); - return arrangeLLVMFunctionInfo( - GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, extParamInfos, required); + return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), + FnInfoOpts::None, argTys, einfo, extParamInfos, + required); } const CGFunctionInfo & @@ -523,9 +521,8 @@ CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType, auto argTypes = getArgTypesForCall(Context, args); FunctionType::ExtInfo einfo; - return arrangeLLVMFunctionInfo( - GetReturnType(returnType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(returnType), FnInfoOpts::None, + argTypes, einfo, {}, RequiredArgs::All); } const CGFunctionInfo & @@ -550,8 +547,7 @@ CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual FTP = GetFormalType(MD); CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)}; - return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, - /*chainCall=*/false, ArgTys, + return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::None, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); } @@ -570,9 +566,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, ArgTys.push_back(Context.IntTy); CallingConv CC = Context.getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); - return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true, - /*chainCall=*/false, ArgTys, - FunctionType::ExtInfo(CC), {}, + return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::IsInstanceMethod, + ArgTys, FunctionType::ExtInfo(CC), {}, RequiredArgs::All); } @@ -616,10 +611,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, SmallVector argTypes; for (const auto &arg : args) argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty)); + FnInfoOpts opts = chainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None; return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()), - /*instanceMethod=*/false, chainCall, - argTypes, fnType->getExtInfo(), paramInfos, - required); + opts, argTypes, fnType->getExtInfo(), + paramInfos, required); } /// Figure out the rules for calling a function with the given formal @@ -650,8 +645,8 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, auto argTypes = getArgTypesForDeclaration(Context, params); return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), - /*instanceMethod*/ false, /*chainCall*/ false, - argTypes, proto->getExtInfo(), paramInfos, + FnInfoOpts::None, argTypes, + proto->getExtInfo(), paramInfos, RequiredArgs::forPrototypePlus(proto, 1)); } @@ -662,10 +657,9 @@ CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType, SmallVector argTypes; for (const auto &Arg : args) argTypes.push_back(Context.getCanonicalParamType(Arg.Ty)); - return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, - /*chainCall=*/false, argTypes, FunctionType::ExtInfo(), - /*paramInfos=*/ {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, + argTypes, FunctionType::ExtInfo(), + /*paramInfos=*/{}, RequiredArgs::All); } const CGFunctionInfo & @@ -673,17 +667,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args) { auto argTypes = getArgTypesForDeclaration(Context, args); - return arrangeLLVMFunctionInfo( - GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false, - argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, + argTypes, FunctionType::ExtInfo(), {}, + RequiredArgs::All); } const CGFunctionInfo & CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType, ArrayRef argTypes) { - return arrangeLLVMFunctionInfo( - resultType, /*instanceMethod=*/false, /*chainCall=*/false, - argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::None, argTypes, + FunctionType::ExtInfo(), {}, + RequiredArgs::All); } /// Arrange a call to a C++ method, passing the given arguments. @@ -706,15 +700,15 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, auto argTypes = getArgTypesForCall(Context, args); FunctionType::ExtInfo info = proto->getExtInfo(); - return arrangeLLVMFunctionInfo( - GetReturnType(proto->getReturnType()), /*instanceMethod=*/true, - /*chainCall=*/false, argTypes, info, paramInfos, required); + return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), + FnInfoOpts::IsInstanceMethod, argTypes, info, + paramInfos, required); } const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { - return arrangeLLVMFunctionInfo( - getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All); + return arrangeLLVMFunctionInfo(getContext().VoidTy, FnInfoOpts::None, + std::nullopt, FunctionType::ExtInfo(), {}, + RequiredArgs::All); } const CGFunctionInfo & @@ -734,12 +728,15 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature, auto argTypes = getArgTypesForCall(Context, args); assert(signature.getRequiredArgs().allowsOptionalArgs()); - return arrangeLLVMFunctionInfo(signature.getReturnType(), - signature.isInstanceMethod(), - signature.isChainCall(), - argTypes, - signature.getExtInfo(), - paramInfos, + FnInfoOpts opts = FnInfoOpts::None; + if (signature.isInstanceMethod()) + opts |= FnInfoOpts::IsInstanceMethod; + if (signature.isChainCall()) + opts |= FnInfoOpts::IsChainCall; + if (signature.isDelegateCall()) + opts |= FnInfoOpts::IsDelegateCall; + return arrangeLLVMFunctionInfo(signature.getReturnType(), opts, argTypes, + signature.getExtInfo(), paramInfos, signature.getRequiredArgs()); } @@ -752,21 +749,24 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI); /// Arrange the argument and result information for an abstract value /// of a given function type. This is the method which all of the /// above functions ultimately defer to. -const CGFunctionInfo & -CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, - bool instanceMethod, - bool chainCall, - ArrayRef argTypes, - FunctionType::ExtInfo info, - ArrayRef paramInfos, - RequiredArgs required) { +const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo( + CanQualType resultType, FnInfoOpts opts, ArrayRef argTypes, + FunctionType::ExtInfo info, + ArrayRef paramInfos, + RequiredArgs required) { assert(llvm::all_of(argTypes, [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; - CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos, - required, resultType, argTypes); + bool isInstanceMethod = + (opts & FnInfoOpts::IsInstanceMethod) == FnInfoOpts::IsInstanceMethod; + bool isChainCall = + (opts & FnInfoOpts::IsChainCall) == FnInfoOpts::IsChainCall; + bool isDelegateCall = + (opts & FnInfoOpts::IsDelegateCall) == FnInfoOpts::IsDelegateCall; + CGFunctionInfo::Profile(ID, isInstanceMethod, isChainCall, isDelegateCall, + info, paramInfos, required, resultType, argTypes); void *insertPos = nullptr; CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos); @@ -776,8 +776,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, unsigned CC = ClangCallConvToLLVMCallConv(info.getCC()); // Construct the function info. We co-allocate the ArgInfos. - FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info, - paramInfos, resultType, argTypes, required); + FI = CGFunctionInfo::create(CC, isInstanceMethod, isChainCall, isDelegateCall, + info, paramInfos, resultType, argTypes, required); FunctionInfos.InsertNode(FI, insertPos); bool inserted = FunctionsBeingProcessed.insert(FI).second; @@ -812,9 +812,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, return *FI; } -CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, - bool instanceMethod, - bool chainCall, +CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod, + bool chainCall, bool delegateCall, const FunctionType::ExtInfo &info, ArrayRef paramInfos, CanQualType resultType, @@ -834,6 +833,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ASTCallingConvention = info.getCC(); FI->InstanceMethod = instanceMethod; FI->ChainCall = chainCall; + FI->DelegateCall = delegateCall; FI->CmseNSCall = info.getCmseNSCall(); FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); @@ -3989,10 +3989,6 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - if (isInAllocaArgument(CGM.getCXXABI(), type)) { - CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter"); - } - // GetAddrOfLocalVar returns a pointer-to-pointer for references, // but the argument needs to be the original pointer. if (type->isReferenceType()) { @@ -5105,7 +5101,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, "indirect argument must be in alloca address space"); bool NeedCopy = false; - if (Addr.getAlignment() < Align && llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) < Align.getAsAlign()) { @@ -5244,30 +5239,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, dyn_cast(ArgInfo.getCoerceToType()); if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) { llvm::Type *SrcTy = Src.getElementType(); - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy); - - // If the source type is smaller than the destination type of the - // coerce-to logic, copy the source value into a temp alloca the size - // of the destination type to allow loading all of it. The bits past - // the source value are left undef. - if (SrcSize < DstSize) { - Address TempAlloca - = CreateTempAlloca(STy, Src.getAlignment(), - Src.getName() + ".coerce"); - Builder.CreateMemCpy(TempAlloca, Src, SrcSize); - Src = TempAlloca; + llvm::TypeSize SrcTypeSize = + CGM.getDataLayout().getTypeAllocSize(SrcTy); + llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy); + if (SrcTypeSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(SrcTypeSize == DstTypeSize && + "Only allow non-fractional movement of structure with " + "homogeneous scalable vector type"); + assert(NumIRArgs == STy->getNumElements()); + + llvm::Value *StoredStructValue = + Builder.CreateLoad(Src, Src.getName() + ".tuple"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + llvm::Value *Extract = Builder.CreateExtractValue( + StoredStructValue, i, Src.getName() + ".extract" + Twine(i)); + IRCallArgs[FirstIRArg + i] = Extract; + } } else { - Src = Src.withElementType(STy); - } + uint64_t SrcSize = SrcTypeSize.getFixedValue(); + uint64_t DstSize = DstTypeSize.getFixedValue(); + + // If the source type is smaller than the destination type of the + // coerce-to logic, copy the source value into a temp alloca the size + // of the destination type to allow loading all of it. The bits past + // the source value are left undef. + if (SrcSize < DstSize) { + Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(), + Src.getName() + ".coerce"); + Builder.CreateMemCpy(TempAlloca, Src, SrcSize); + Src = TempAlloca; + } else { + Src = Src.withElementType(STy); + } - assert(NumIRArgs == STy->getNumElements()); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Address EltPtr = Builder.CreateStructGEP(Src, i); - llvm::Value *LI = Builder.CreateLoad(EltPtr); - if (ArgHasMaybeUndefAttr) - LI = Builder.CreateFreeze(LI); - IRCallArgs[FirstIRArg + i] = LI; + assert(NumIRArgs == STy->getNumElements()); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Address EltPtr = Builder.CreateStructGEP(Src, i); + llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); + IRCallArgs[FirstIRArg + i] = LI; + } } } else { // In the simple case, just pass the coerced loaded value. @@ -5472,6 +5487,30 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } + // The await_suspend call performed by co_await is essentially asynchronous + // to the execution of the coroutine. Inlining it normally into an unsplit + // coroutine can cause miscompilation because the coroutine CFG misrepresents + // the true control flow of the program: things that happen in the + // await_suspend are not guaranteed to happen prior to the resumption of the + // coroutine, and things that happen after the resumption of the coroutine + // (including its exit and the potential deallocation of the coroutine frame) + // are not guaranteed to happen only after the end of await_suspend. + // + // The short-term solution to this problem is to mark the call as uninlinable. + // But we don't want to do this if the call is known to be trivial, which is + // very common. + // + // The long-term solution may introduce patterns like: + // + // call @llvm.coro.await_suspend(ptr %awaiter, ptr %handle, + // ptr @awaitSuspendFn) + // + // Then it is much easier to perform the safety analysis in the middle end. + // If it is safe to inline the call to awaitSuspend, we can replace it in the + // CoroEarly pass. Otherwise we could replace it in the CoroSplit pass. + if (inSuspendBlock() && mayCoroHandleEscape()) + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); + // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); @@ -5765,9 +5804,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, DestIsVolatile = false; } - // If the value is offset in memory, apply the offset now. - Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI); - CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + // An empty record can overlap other data (if declared with + // no_unique_address); omit the store for such types - as there is no + // actual data to store. + if (!isEmptyRecord(getContext(), RetTy, true)) { + // If the value is offset in memory, apply the offset now. + Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI); + CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + } return convertTempToRValue(DestPtr, RetTy, SourceLocation()); } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index eaaf10c4eec6..65a7d8e83288 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -383,6 +383,35 @@ void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, const TargetOptions &TargetOpts, bool WillInternalize); +enum class FnInfoOpts { + None = 0, + IsInstanceMethod = 1 << 0, + IsChainCall = 1 << 1, + IsDelegateCall = 1 << 2, +}; + +inline FnInfoOpts operator|(FnInfoOpts A, FnInfoOpts B) { + return static_cast( + static_cast>(A) | + static_cast>(B)); +} + +inline FnInfoOpts operator&(FnInfoOpts A, FnInfoOpts B) { + return static_cast( + static_cast>(A) & + static_cast>(B)); +} + +inline FnInfoOpts operator|=(FnInfoOpts A, FnInfoOpts B) { + A = A | B; + return A; +} + +inline FnInfoOpts operator&=(FnInfoOpts A, FnInfoOpts B) { + A = A & B; + return A; +} + } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 93e7b54fca04..6ef7d12372d0 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2927,14 +2927,16 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( } void CodeGenFunction::EmitForwardingCallToLambda( - const CXXMethodDecl *callOperator, - CallArgList &callArgs) { + const CXXMethodDecl *callOperator, CallArgList &callArgs, + const CGFunctionInfo *calleeFnInfo, llvm::Constant *calleePtr) { // Get the address of the call operator. - const CGFunctionInfo &calleeFnInfo = - CGM.getTypes().arrangeCXXMethodDeclaration(callOperator); - llvm::Constant *calleePtr = - CGM.GetAddrOfFunction(GlobalDecl(callOperator), - CGM.getTypes().GetFunctionType(calleeFnInfo)); + if (!calleeFnInfo) + calleeFnInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(callOperator); + + if (!calleePtr) + calleePtr = + CGM.GetAddrOfFunction(GlobalDecl(callOperator), + CGM.getTypes().GetFunctionType(*calleeFnInfo)); // Prepare the return slot. const FunctionProtoType *FPT = @@ -2942,8 +2944,8 @@ void CodeGenFunction::EmitForwardingCallToLambda( QualType resultType = FPT->getReturnType(); ReturnValueSlot returnSlot; if (!resultType->isVoidType() && - calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(calleeFnInfo.getReturnType())) + calleeFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && + !hasScalarEvaluationKind(calleeFnInfo->getReturnType())) returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified(), /*IsUnused=*/false, /*IsExternallyDestructed=*/true); @@ -2954,7 +2956,7 @@ void CodeGenFunction::EmitForwardingCallToLambda( // Now emit our call. auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator)); - RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs); + RValue RV = EmitCall(*calleeFnInfo, callee, returnSlot, callArgs); // If necessary, copy the returned value into the slot. if (!resultType->isVoidType() && returnSlot.isNull()) { @@ -2996,7 +2998,15 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { + if (MD->isVariadic()) { + // FIXME: Making this work correctly is nasty because it requires either + // cloning the body of the call operator or making the call operator + // forward. + CGM.ErrorUnsupported(MD, "lambda conversion to variadic function"); + return; + } + const CXXRecordDecl *Lambda = MD->getParent(); // Start building arguments for forwarding call @@ -3007,10 +3017,16 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { Address ThisPtr = CreateMemTemp(LambdaType, "unused.capture"); CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); - // Add the rest of the parameters. + EmitLambdaDelegatingInvokeBody(MD, CallArgs); +} + +void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD, + CallArgList &CallArgs) { + // Add the rest of the forwarded parameters. for (auto *Param : MD->parameters()) EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc()); + const CXXRecordDecl *Lambda = MD->getParent(); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); // For a generic lambda, find the corresponding call operator specialization // to which the call to the static-invoker shall be forwarded. @@ -3024,10 +3040,21 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { assert(CorrespondingCallOpSpecialization); CallOp = cast(CorrespondingCallOpSpecialization); } + + // Special lambda forwarding when there are inalloca parameters. + if (hasInAllocaArg(MD)) { + const CGFunctionInfo *ImplFnInfo = nullptr; + llvm::Function *ImplFn = nullptr; + EmitLambdaInAllocaImplFn(CallOp, &ImplFnInfo, &ImplFn); + + EmitForwardingCallToLambda(CallOp, CallArgs, ImplFnInfo, ImplFn); + return; + } + EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD) { if (MD->isVariadic()) { // FIXME: Making this work correctly is nasty because it requires either // cloning the body of the call operator or making the call operator forward. @@ -3035,5 +3062,56 @@ void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { return; } - EmitLambdaDelegatingInvokeBody(MD); + // Forward %this argument. + CallArgList CallArgs; + QualType LambdaType = getContext().getRecordType(MD->getParent()); + QualType ThisType = getContext().getPointerType(LambdaType); + llvm::Value *ThisArg = CurFn->getArg(0); + CallArgs.add(RValue::get(ThisArg), ThisType); + + EmitLambdaDelegatingInvokeBody(MD, CallArgs); +} + +void CodeGenFunction::EmitLambdaInAllocaImplFn( + const CXXMethodDecl *CallOp, const CGFunctionInfo **ImplFnInfo, + llvm::Function **ImplFn) { + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeCXXMethodDeclaration(CallOp); + llvm::Function *CallOpFn = + cast(CGM.GetAddrOfFunction(GlobalDecl(CallOp))); + + // Emit function containing the original call op body. __invoke will delegate + // to this function. + SmallVector ArgTypes; + for (auto I = FnInfo.arg_begin(); I != FnInfo.arg_end(); ++I) + ArgTypes.push_back(I->type); + *ImplFnInfo = &CGM.getTypes().arrangeLLVMFunctionInfo( + FnInfo.getReturnType(), FnInfoOpts::IsDelegateCall, ArgTypes, + FnInfo.getExtInfo(), {}, FnInfo.getRequiredArgs()); + + // Create mangled name as if this was a method named __impl. If for some + // reason the name doesn't look as expected then just tack __impl to the + // front. + // TODO: Use the name mangler to produce the right name instead of using + // string replacement. + StringRef CallOpName = CallOpFn->getName(); + std::string ImplName; + if (size_t Pos = CallOpName.find_first_of("getParent()->getFunction(ImplName); + if (!Fn) { + Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(**ImplFnInfo), + llvm::GlobalValue::InternalLinkage, ImplName, + CGM.getModule()); + CGM.SetInternalFunctionAttributes(CallOp, Fn, **ImplFnInfo); + + const GlobalDecl &GD = GlobalDecl(CallOp); + const auto *D = cast(GD.getDecl()); + CodeGenFunction(CGM).GenerateCode(GD, Fn, **ImplFnInfo); + CGM.SetLLVMFunctionAttributesForDefinition(D, Fn); + } + *ImplFn = Fn; } diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 8437cda79beb..810ae7d51ec1 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -139,6 +139,36 @@ static bool memberCallExpressionCanThrow(const Expr *E) { return true; } +/// Return true when the coroutine handle may escape from the await-suspend +/// (`awaiter.await_suspend(std::coroutine_handle)` expression). +/// Return false only when the coroutine wouldn't escape in the await-suspend +/// for sure. +/// +/// While it is always safe to return true, return falses can bring better +/// performances. +/// +/// See https://github.com/llvm/llvm-project/issues/56301 and +/// https://reviews.llvm.org/D157070 for the example and the full discussion. +/// +/// FIXME: It will be much better to perform such analysis in the middle end. +/// See the comments in `CodeGenFunction::EmitCall` for example. +static bool MayCoroHandleEscape(CoroutineSuspendExpr const &S) { + CXXRecordDecl *Awaiter = + S.getCommonExpr()->getType().getNonReferenceType()->getAsCXXRecordDecl(); + + // Return true conservatively if the awaiter type is not a record type. + if (!Awaiter) + return true; + + // In case the awaiter type is empty, the suspend wouldn't leak the coroutine + // handle. + // + // TODO: We can improve this by looking into the implementation of + // await-suspend and see if the coroutine handle is passed to foreign + // functions. + return !Awaiter->field_empty(); +} + // Emit suspend expression which roughly looks like: // // auto && x = CommonExpr(); @@ -199,8 +229,11 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); CGF.CurCoro.InSuspendBlock = true; + CGF.CurCoro.MayCoroHandleEscape = MayCoroHandleEscape(S); auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); CGF.CurCoro.InSuspendBlock = false; + CGF.CurCoro.MayCoroHandleEscape = false; + if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. BasicBlock *RealSuspendBlock = diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index f049a682cfed..d8eb2aecb87a 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -391,12 +391,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { SourceManager &SM = CGM.getContext().getSourceManager(); StringRef FileName; FileID FID; + std::optional> CSInfo; if (Loc.isInvalid()) { // The DIFile used by the CU is distinct from the main source file. Call // createFile() below for canonicalization if the source file was specified // with an absolute path. FileName = TheCU->getFile()->getFilename(); + CSInfo = TheCU->getFile()->getChecksum(); } else { PresumedLoc PLoc = SM.getPresumedLoc(Loc); FileName = PLoc.getFilename(); @@ -417,13 +419,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { return cast(V); } + // Put Checksum at a scope where it will persist past the createFile call. SmallString<64> Checksum; - - std::optional CSKind = + if (!CSInfo) { + std::optional CSKind = computeChecksum(FID, Checksum); - std::optional> CSInfo; - if (CSKind) - CSInfo.emplace(*CSKind, Checksum); + if (CSKind) + CSInfo.emplace(*CSKind, Checksum); + } return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); } diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 1fd08626358b..58ee6dd64c4f 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -148,7 +148,7 @@ class CGDebugInfo { llvm::BumpPtrAllocator DebugInfoNames; StringRef CWDName; - llvm::StringMap DIFileCache; + llvm::DenseMap DIFileCache; llvm::DenseMap SPCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations and global alias variables) that aren't covered diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index b0d6eb05acc2..d99dcdba8e43 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -202,7 +202,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return; llvm::GlobalValue::LinkageTypes Linkage = - CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false); + CGM.getLLVMLinkageVarDefinition(&D); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index be8fb6c274db..a9c88110d6f0 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -279,8 +279,8 @@ llvm::Function *CodeGenFunction::createTLSAtExitStub( } const CGFunctionInfo &FI = CGM.getTypes().arrangeLLVMFunctionInfo( - getContext().IntTy, /*instanceMethod=*/false, /*chainCall=*/false, - {getContext().IntTy}, FunctionType::ExtInfo(), {}, RequiredArgs::All); + getContext().IntTy, FnInfoOpts::None, {getContext().IntTy}, + FunctionType::ExtInfo(), {}, RequiredArgs::All); // Get the stub function type, int(*)(int,...). llvm::FunctionType *StubTy = diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ed6095f7cfeb..fc16b3133f73 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2692,8 +2692,7 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) { /// this context. static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, const DeclRefExpr *E, - const VarDecl *VD, - bool IsConstant) { + const VarDecl *VD) { // For a variable declared in an enclosing scope, do not emit a spurious // reference even if we have a capture, as that will emit an unwarranted // reference to our capture state, and will likely generate worse code than @@ -2726,7 +2725,7 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, // We can emit a spurious reference only if the linkage implies that we'll // be emitting a non-interposable symbol that will be retained until link // time. - switch (CGF.CGM.getLLVMLinkageVarDefinition(VD, IsConstant)) { + switch (CGF.CGM.getLLVMLinkageVarDefinition(VD)) { case llvm::GlobalValue::ExternalLinkage: case llvm::GlobalValue::LinkOnceODRLinkage: case llvm::GlobalValue::WeakODRLinkage: @@ -2757,7 +2756,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // constant value directly instead. if (E->isNonOdrUse() == NOUR_Constant && (VD->getType()->isReferenceType() || - !canEmitSpuriousReferenceToVariable(*this, E, VD, true))) { + !canEmitSpuriousReferenceToVariable(*this, E, VD))) { VD->getAnyInitializer(VD); llvm::Constant *Val = ConstantEmitter(*this).emitAbstract( E->getLocation(), *VD->evaluateValue(), VD->getType()); @@ -2859,7 +2858,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // some reason; most likely, because it's in an outer function. } else if (VD->isStaticLocal()) { llvm::Constant *var = CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD)); addr = Address( var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 353ee56839f3..942daa4aa577 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1918,7 +1918,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { if (VD->isLocalVarDecl()) { return CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD)); } } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a52ec8909b12..124eade4617f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1667,7 +1667,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; auto LinkageForVariable = [&VD, this]() { - return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + return CGM.getLLVMLinkageVarDefinition(VD); }; std::vector GeneratedRefs; @@ -10151,6 +10151,13 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + + // If this is an 'extern' declaration we defer to the canonical definition and + // do not emit an offloading entry. + if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && + VD->hasExternalStorage()) + return; + if (!Res) { if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Register non-target variables being emitted in device code (debug info @@ -10163,7 +10170,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; auto LinkageForVariable = [&VD, this]() { - return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + return CGM.getLLVMLinkageVarDefinition(VD); }; std::vector GeneratedRefs; diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp index d3a16a1d5acc..a6073e1188d6 100644 --- a/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -65,9 +65,8 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, ArrayRef argTypes, FunctionType::ExtInfo info, RequiredArgs args) { - return CGM.getTypes().arrangeLLVMFunctionInfo( - returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes, - info, {}, args); + return CGM.getTypes().arrangeLLVMFunctionInfo(returnType, FnInfoOpts::None, + argTypes, info, {}, args); } ImplicitCXXConstructorArgs diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index fab70b66d1d9..7ef893cb1a2d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -572,7 +572,7 @@ llvm::ConstantInt * CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { // Remove any (C++17) exception specifications, to allow calling e.g. a // noexcept function through a non-noexcept pointer. - if (!isa(Ty)) + if (!Ty->isFunctionNoProtoType()) Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None); std::string Mangled; llvm::raw_string_ostream Out(Mangled); @@ -683,6 +683,19 @@ static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { return true; } +bool CodeGenFunction::isInAllocaArgument(CGCXXABI &ABI, QualType Ty) { + const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory; +} + +bool CodeGenFunction::hasInAllocaArg(const CXXMethodDecl *MD) { + return getTarget().getTriple().getArch() == llvm::Triple::x86 && + getTarget().getCXXABI().isMicrosoft() && + llvm::any_of(MD->parameters(), [&](ParmVarDecl *P) { + return isInAllocaArgument(CGM.getCXXABI(), P->getType()); + }); +} + /// Return the UBSan prologue signature for \p FD if one is available. static llvm::Constant *getPrologueSignature(CodeGenModule &CGM, const FunctionDecl *FD) { @@ -1447,6 +1460,17 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // The lambda static invoker function is special, because it forwards or // clones the body of the function call operator (but is actually static). EmitLambdaStaticInvokeBody(cast(FD)); + } else if (isa(FD) && + isLambdaCallOperator(cast(FD)) && + !FnInfo.isDelegateCall() && + cast(FD)->getParent()->getLambdaStaticInvoker() && + hasInAllocaArg(cast(FD))) { + // If emitting a lambda with static invoker on X86 Windows, change + // the call operator body. + // Make sure that this is a call operator with an inalloca arg and check + // for delegate call to make sure this is the original call op and not the + // new forwarding function for the static invoker. + EmitLambdaInAllocaCallOpBody(cast(FD)); } else if (FD->isDefaulted() && isa(FD) && (cast(FD)->isCopyAssignmentOperator() || cast(FD)->isMoveAssignmentOperator())) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 409f48a04906..28ec2b970072 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -334,6 +334,7 @@ class CodeGenFunction : public CodeGenTypeCache { struct CGCoroInfo { std::unique_ptr Data; bool InSuspendBlock = false; + bool MayCoroHandleEscape = false; CGCoroInfo(); ~CGCoroInfo(); }; @@ -347,6 +348,10 @@ class CodeGenFunction : public CodeGenTypeCache { return isCoroutine() && CurCoro.InSuspendBlock; } + bool mayCoroHandleEscape() const { + return isCoroutine() && CurCoro.MayCoroHandleEscape; + } + /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; @@ -1963,6 +1968,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// Check if the return value of this function requires sanitization. bool requiresReturnValueCheck() const; + bool isInAllocaArgument(CGCXXABI &ABI, QualType Ty); + bool hasInAllocaArg(const CXXMethodDecl *MD); + llvm::BasicBlock *TerminateLandingPad = nullptr; llvm::BasicBlock *TerminateHandler = nullptr; llvm::SmallVector TrapBBs; @@ -2227,10 +2235,17 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S); void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, - CallArgList &CallArgs); + CallArgList &CallArgs, + const CGFunctionInfo *CallOpFnInfo = nullptr, + llvm::Constant *CallOpFn = nullptr); void EmitLambdaBlockInvokeBody(); - void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); + void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD, + CallArgList &CallArgs); + void EmitLambdaInAllocaImplFn(const CXXMethodDecl *CallOp, + const CGFunctionInfo **ImplFnInfo, + llvm::Function **ImplFn); + void EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD); void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) { EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 07a9dec12f6f..a3506df7d4e5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1974,7 +1974,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { return llvm::GlobalValue::InternalLinkage; } - return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false); + return getLLVMLinkageForDeclarator(D, Linkage); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { @@ -3605,6 +3605,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Emit declaration of the must-be-emitted declare target variable. if (std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { + + // If this variable has external storage and doesn't require special + // link handling we defer to its canonical definition. + if (VD->hasExternalStorage() && + Res != OMPDeclareTargetDeclAttr::MT_Link) + return; + bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); if ((*Res == OMPDeclareTargetDeclAttr::MT_To || @@ -3638,6 +3645,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) { // Emit the definition if it can't be deferred. EmitGlobalDefinition(GD); + addEmittedDeferredDecl(GD); return; } @@ -3657,7 +3665,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); - EmittedDeferredDecls[MangledName] = GD; } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into @@ -4397,7 +4404,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); - EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're @@ -4678,7 +4684,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); - EmittedDeferredDecls[DDI->first] = DDI->second; DeferredDecls.erase(DDI); } @@ -5221,8 +5226,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, AddGlobalAnnotations(D, GV); // Set the llvm linkage type as appropriate. - llvm::GlobalValue::LinkageTypes Linkage = - getLLVMLinkageVarDefinition(D, GV->isConstant()); + llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" @@ -5415,8 +5419,9 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, return false; } -llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( - const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) { +llvm::GlobalValue::LinkageTypes +CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D, + GVALinkage Linkage) { if (Linkage == GVA_Internal) return llvm::Function::InternalLinkage; @@ -5486,10 +5491,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( return llvm::GlobalVariable::ExternalLinkage; } -llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition( - const VarDecl *VD, bool IsConstant) { +llvm::GlobalValue::LinkageTypes +CodeGenModule::getLLVMLinkageVarDefinition(const VarDecl *VD) { GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD); - return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant); + return getLLVMLinkageForDeclarator(VD, Linkage); } /// Replace the uses of a function that was declared with a non-proto type. @@ -5701,7 +5706,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, /*D=*/nullptr); if (const auto *VD = dyn_cast(GD.getDecl())) - LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified()); + LT = getLLVMLinkageVarDefinition(VD); else LT = getFunctionLinkage(GD); } @@ -6332,8 +6337,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( } // Create a global variable for this lifetime-extended temporary. - llvm::GlobalValue::LinkageTypes Linkage = - getLLVMLinkageVarDefinition(VD, Constant); + llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD); if (Linkage == llvm::GlobalVariable::ExternalLinkage) { const VarDecl *InitVD; if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 05cb217e2bee..dd97808c7775 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -361,10 +361,19 @@ class CodeGenModule : public CodeGenTypeCache { llvm::DenseMap EmittedDeferredDecls; void addEmittedDeferredDecl(GlobalDecl GD) { - if (!llvm::isa(GD.getDecl())) + // Reemission is only needed in incremental mode. + if (!Context.getLangOpts().IncrementalExtensions) return; - llvm::GlobalVariable::LinkageTypes L = getFunctionLinkage(GD); - if (llvm::GlobalValue::isLinkOnceLinkage(L) || + + // Assume a linkage by default that does not need reemission. + auto L = llvm::GlobalValue::ExternalLinkage; + if (llvm::isa(GD.getDecl())) + L = getFunctionLinkage(GD); + else if (auto *VD = llvm::dyn_cast(GD.getDecl())) + L = getLLVMLinkageVarDefinition(VD); + + if (llvm::GlobalValue::isInternalLinkage(L) || + llvm::GlobalValue::isLinkOnceLinkage(L) || llvm::GlobalValue::isWeakLinkage(L)) { EmittedDeferredDecls[getMangledName(GD)] = GD; } @@ -1321,12 +1330,11 @@ class CodeGenModule : public CodeGenTypeCache { /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes - getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage, - bool IsConstantVariable); + getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage); /// Returns LLVM linkage for a declarator. llvm::GlobalValue::LinkageTypes - getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant); + getLLVMLinkageVarDefinition(const VarDecl *VD); /// Emit all the global annotations. void EmitGlobalAnnotations(); diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 9088f77b95c3..a0e846d9a751 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -252,13 +252,11 @@ class CodeGenTypes { /// this. /// /// \param argTypes - must all actually be canonical as params - const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType, - bool instanceMethod, - bool chainCall, - ArrayRef argTypes, - FunctionType::ExtInfo info, - ArrayRef paramInfos, - RequiredArgs args); + const CGFunctionInfo &arrangeLLVMFunctionInfo( + CanQualType returnType, FnInfoOpts opts, ArrayRef argTypes, + FunctionType::ExtInfo info, + ArrayRef paramInfos, + RequiredArgs args); /// Compute a new LLVM record layout object for the given record. std::unique_ptr ComputeRecordLayout(const RecordDecl *D, diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 79a926cb9edd..ede9efb019ce 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2839,7 +2839,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD, static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) { llvm::GlobalValue::LinkageTypes VarLinkage = - CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + CGM.getLLVMLinkageVarDefinition(VD); // For internal linkage variables, we don't need an external or weak wrapper. if (llvm::GlobalValue::isLocalLinkage(VarLinkage)) diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index a692abaf3b75..a14efbdba76b 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1379,8 +1379,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( case Dtor_Base: // The base destructor most closely tracks the user-declared constructor, so // we delegate back to the normal declarator case. - return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*IsConstantVariable=*/false); + return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage); case Dtor_Complete: // The complete destructor is like an inline function, but it may be // imported and therefore must be exported as well. This requires changing diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 6391a8aeaa67..7483bf6d6d1e 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -148,6 +148,13 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { uint64_t ArraySize = ATy->getSize().getZExtValue(); QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible to be + // passed via FARs in C++. + if (const auto *RTy = EltTy->getAs()) { + if (ArraySize != 0 && isa(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); for (uint64_t i = 0; i < ArraySize; ++i) { if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, @@ -163,7 +170,7 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true)) + if (isEmptyRecord(getContext(), Ty, true, true)) return true; const RecordDecl *RD = RTy->getDecl(); // Unions aren't eligible unless they're empty (which is caught above). @@ -222,6 +229,8 @@ bool LoongArchABIInfo::detectFARsEligibleStruct( if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off)) return false; + if (!Field1Ty) + return false; // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) return false; diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index b6d8ae462675..b12c3025f607 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -8,7 +8,6 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" -#include "llvm/TargetParser/RISCVTargetParser.h" using namespace clang; using namespace clang::CodeGen; @@ -152,6 +151,13 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { uint64_t ArraySize = ATy->getSize().getZExtValue(); QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible for + // the FP calling convention in C++. + if (const auto *RTy = EltTy->getAs()) { + if (ArraySize != 0 && isa(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); for (uint64_t i = 0; i < ArraySize; ++i) { bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, @@ -168,7 +174,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true)) + if (isEmptyRecord(getContext(), Ty, true, true)) return true; const RecordDecl *RD = RTy->getDecl(); // Unions aren't eligible unless they're empty (which is caught above). @@ -238,6 +244,8 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, NeededArgFPRs = 0; bool IsCandidate = detectFPCCEligibleStructHelper( Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Field1Ty) + return false; // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) return false; @@ -315,11 +323,15 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - const auto *BT = VT->getElementType()->castAs(); - unsigned EltSize = getContext().getTypeSize(BT); + auto VScale = + getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts()); + // The MinNumElts is simplified from equation: + // NumElts / VScale = + // (EltSize * NumElts / (VScale * RVVBitsPerBlock)) + // * (RVVBitsPerBlock / EltSize) llvm::ScalableVectorType *ResType = - llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), - llvm::RISCV::RVVBitsPerBlock / EltSize); + llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), + VT->getNumElements() / VScale->first); return ABIArgInfo::getDirect(ResType); } diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 31679d899a44..9f5c3258d65c 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -140,7 +140,8 @@ class X86_32ABIInfo : public ABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; - ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, + bool isDelegateCall) const; /// Updates the number of available free registers, returns /// true if any registers were allocated. @@ -737,8 +738,8 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c } } -ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, - CCState &State) const { +ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, + bool isDelegateCall) const { // FIXME: Set alignment on indirect arguments. bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; @@ -753,6 +754,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, false, State); + } else if (isDelegateCall) { + // Avoid having different alignments on delegate call args by always + // setting the alignment to 4, which is what we do for inallocas. + ABIArgInfo Res = getIndirectResult(Ty, false, State); + Res.setIndirectAlign(CharUnits::fromQuantity(4)); + return Res; } else if (RAA == CGCXXABI::RAA_DirectInMemory) { // The field index doesn't matter, we'll fix it up later. return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); @@ -940,7 +947,8 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { if (State.IsPreassigned.test(I)) continue; - Args[I].info = classifyArgumentType(Args[I].type, State); + Args[I].info = + classifyArgumentType(Args[I].type, State, FI.isDelegateCall()); UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 488350169efa..f6ea4d0b4366 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4741,13 +4741,8 @@ Action *Driver::ConstructPhaseAction( } case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { - types::ID Output; - if (Args.hasArg(options::OPT_S)) - Output = types::TY_LTO_IR; - else if (Args.hasArg(options::OPT_ffat_lto_objects)) - Output = types::TY_PP_Asm; - else - Output = types::TY_LTO_BC; + types::ID Output = + Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); } if (isUsingLTO(/* IsOffload */ true) && diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index c3ce13f93464..12fe55be9113 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -37,6 +37,8 @@ static const SanitizerMask NeedsUbsanCxxRt = SanitizerKind::Vptr | SanitizerKind::CFI; static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr; static const SanitizerMask NotAllowedWithMinimalRuntime = SanitizerKind::Vptr; +static const SanitizerMask NotAllowedWithExecuteOnly = + SanitizerKind::Function | SanitizerKind::KCFI; static const SanitizerMask RequiresPIE = SanitizerKind::DataFlow | SanitizerKind::Scudo; static const SanitizerMask NeedsUnwindTables = @@ -141,6 +143,16 @@ static std::string describeSanitizeArg(const llvm::opt::Arg *A, /// Sanitizers set. static std::string toString(const clang::SanitizerSet &Sanitizers); +/// Return true if an execute-only target disallows data access to code +/// sections. +static bool isExecuteOnlyTarget(const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) { + if (Triple.isPS5()) + return true; + return Args.hasFlagNoClaim(options::OPT_mexecute_only, + options::OPT_mno_execute_only, false); +} + static void validateSpecialCaseListFormat(const Driver &D, std::vector &SCLFiles, unsigned MalformedSCLErrorDiagID, @@ -395,6 +407,22 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, DiagnosedKinds |= SanitizerKind::Function; } } + // -fsanitize=function and -fsanitize=kcfi instrument indirect function + // calls to load a type hash before the function label. Therefore, an + // execute-only target doesn't support the function and kcfi sanitizers. + const llvm::Triple &Triple = TC.getTriple(); + if (isExecuteOnlyTarget(Triple, Args)) { + if (SanitizerMask KindsToDiagnose = + Add & NotAllowedWithExecuteOnly & ~DiagnosedKinds) { + if (DiagnoseErrors) { + std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose); + D.Diag(diag::err_drv_argument_not_allowed_with) + << Desc << Triple.str(); + } + DiagnosedKinds |= KindsToDiagnose; + } + Add &= ~NotAllowedWithExecuteOnly; + } // FIXME: Make CFI on member function calls compatible with cross-DSO CFI. // There are currently two problems: @@ -457,6 +485,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, if (MinimalRuntime) { Add &= ~NotAllowedWithMinimalRuntime; } + // NotAllowedWithExecuteOnly is silently discarded on an execute-only + // target if implicitly enabled through group expansion. + if (isExecuteOnlyTarget(Triple, Args)) + Add &= ~NotAllowedWithExecuteOnly; if (CfiCrossDso) Add &= ~SanitizerKind::CFIMFCall; Add &= Supported; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index d60fdbc17968..8dafc3d481c2 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -427,6 +427,12 @@ ToolChain::getDefaultUnwindTableLevel(const ArgList &Args) const { return UnwindTableLevel::None; } +unsigned ToolChain::GetDefaultDwarfVersion() const { + // TODO: Remove the RISC-V special case when R_RISCV_SET_ULEB128 linker + // support becomes more widely available. + return getTriple().isRISCV() ? 4 : 5; +} + Tool *ToolChain::getClang() const { if (!Clang) Clang.reset(new tools::Clang(*this, useIntegratedBackend())); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 6cbb06b9a91f..65925e9ed610 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -127,31 +127,11 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features) { - StringRef ArchName; - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { + std::string ArchName; + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) ArchName = A->getValue(); - - // Handle -march=native. - if (ArchName == "native") { - ArchName = llvm::sys::getHostCPUName(); - if (ArchName == "generic") - ArchName = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); - } - - if (!llvm::LoongArch::isValidArchName(ArchName)) { - D.Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args); - return; - } - } - - // Select a default arch name. - if (ArchName.empty()) - ArchName = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); - - if (!ArchName.empty()) { - llvm::LoongArch::getArchFeatures(ArchName, Features); - llvm::LoongArch::setArch(ArchName); - } + ArchName = postProcessTargetCPUString(ArchName, Triple); + llvm::LoongArch::getArchFeatures(ArchName, Features); // Select floating-point features determined by -mdouble-float, // -msingle-float, -msoft-float and -mfpu. @@ -196,3 +176,25 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) A->ignoreTargetSpecific(); } + +std::string loongarch::postProcessTargetCPUString(const std::string &CPU, + const llvm::Triple &Triple) { + std::string CPUString = CPU; + if (CPUString == "native") { + CPUString = llvm::sys::getHostCPUName(); + if (CPUString == "generic") + CPUString = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); + } + if (CPUString.empty()) + CPUString = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); + return CPUString; +} + +std::string loongarch::getLoongArchTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple) { + std::string CPU; + // If we have -march, use that. + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) + CPU = A->getValue(); + return postProcessTargetCPUString(CPU, Triple); +} diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h index 0084474e7ed3..d8280cd836f8 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.h +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h @@ -23,6 +23,12 @@ void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, StringRef getLoongArchABI(const Driver &D, const llvm::opt::ArgList &Args, const llvm::Triple &Triple); + +std::string postProcessTargetCPUString(const std::string &CPU, + const llvm::Triple &Triple); + +std::string getLoongArchTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); } // end namespace loongarch } // end namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 286bac2e7a2b..4383b8004143 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -118,7 +118,13 @@ std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args, void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, - std::vector &Features) { + std::vector &Features, bool ForAS) { + if (ForAS) { + // Some target-specific options are only handled in AddX86TargetArgs, which + // is not called by ClangAs::ConstructJob. Claim them here. + Args.claimAllArgs(options::OPT_mfpmath_EQ); + } + // Claim and report unsupported -mabi=. Note: we don't support "sysv_abi" or // "ms_abi" as default function attributes. if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mabi_EQ)) { @@ -267,4 +273,10 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, << A->getSpelling() << Scope; } } + + // -mno-gather, -mno-scatter support + if (Args.hasArg(options::OPT_mno_gather)) + Features.push_back("+prefer-no-gather"); + if (Args.hasArg(options::OPT_mno_scatter)) + Features.push_back("+prefer-no-scatter"); } diff --git a/clang/lib/Driver/ToolChains/Arch/X86.h b/clang/lib/Driver/ToolChains/Arch/X86.h index e07387f3ece3..762a1fa6f4d5 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.h +++ b/clang/lib/Driver/ToolChains/Arch/X86.h @@ -26,7 +26,7 @@ std::string getX86TargetCPU(const Driver &D, const llvm::opt::ArgList &Args, void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, - std::vector &Features); + std::vector &Features, bool ForAS); } // end namespace x86 } // end namespace target diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e3fa315ffcb1..47ec36b3a8ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1863,15 +1863,10 @@ void Clang::AddLoongArchTargetArgs(const ArgList &Args, // Handle -mtune. if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { - StringRef TuneCPU = A->getValue(); - if (TuneCPU == "native") { - TuneCPU = llvm::sys::getHostCPUName(); - if (TuneCPU == "generic") - TuneCPU = llvm::LoongArch::getDefaultArch(Triple.isLoongArch64()); - } + std::string TuneCPU = A->getValue(); + TuneCPU = loongarch::postProcessTargetCPUString(TuneCPU, Triple); CmdArgs.push_back("-tune-cpu"); CmdArgs.push_back(Args.MakeArgString(TuneCPU)); - llvm::LoongArch::setTuneCPU(TuneCPU); } } @@ -2068,6 +2063,12 @@ void Clang::AddPPCTargetArgs(const ArgList &Args, } else if (V == "vec-extabi") { VecExtabi = true; A->claim(); + } else if (V == "elfv1") { + ABIName = "elfv1"; + A->claim(); + } else if (V == "elfv2") { + ABIName = "elfv2"; + A->claim(); } else if (V != "altivec") // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore // the option if given as we don't have backend support for any targets @@ -7375,22 +7376,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (SplitLTOUnit) CmdArgs.push_back("-fsplit-lto-unit"); - if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects, - options::OPT_fno_fat_lto_objects)) { - if (IsUsingLTO && A->getOption().matches(options::OPT_ffat_lto_objects)) { - assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin); - if (!Triple.isOSBinFormatELF()) { - D.Diag(diag::err_drv_unsupported_opt_for_target) - << A->getAsString(Args) << TC.getTripleString(); - } - CmdArgs.push_back(Args.MakeArgString( - Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full"))); - CmdArgs.push_back("-flto-unit"); - CmdArgs.push_back("-ffat-lto-objects"); - A->render(Args, CmdArgs); - } - } - if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, options::OPT_fno_global_isel)) { CmdArgs.push_back("-mllvm"); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 358d7565f47c..8766d34eec53 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -474,6 +474,10 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, case llvm::Triple::wasm32: case llvm::Triple::wasm64: return std::string(getWebAssemblyTargetCPU(Args)); + + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return loongarch::getLoongArchTargetCPU(Args, T); } } @@ -524,7 +528,7 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, break; case llvm::Triple::x86: case llvm::Triple::x86_64: - x86::getX86TargetFeatures(D, Triple, Args, Features); + x86::getX86TargetFeatures(D, Triple, Args, Features, ForAS); break; case llvm::Triple::hexagon: hexagon::getHexagonTargetFeatures(D, Triple, Args, Features); @@ -617,11 +621,6 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, PluginName + Suffix, Plugin); CmdArgs.push_back(Args.MakeArgString(Twine(PluginPrefix) + Plugin)); - } else { - // Tell LLD to find and use .llvm.lto section in regular relocatable object - // files - if (Args.hasArg(options::OPT_ffat_lto_objects)) - CmdArgs.push_back("--fat-lto-objects"); } const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt="; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index b64fff8b14be..40038dce47d8 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1874,6 +1874,12 @@ static bool findBiarchMultilibs(const Driver &D, .flag("-m64", /*Disallow=*/true) .flag("-mx32") .makeMultilib(); + Multilib Alt32sparc = MultilibBuilder() + .gccSuffix("/sparcv8plus") + .includeSuffix("/sparcv8plus") + .flag("-m32") + .flag("-m64", /*Disallow=*/true) + .makeMultilib(); // GCC toolchain for IAMCU doesn't have crtbegin.o, so look for libgcc.a. FilterNonExistent NonExistent( @@ -1885,10 +1891,14 @@ static bool findBiarchMultilibs(const Driver &D, const bool IsX32 = TargetTriple.isX32(); if (TargetTriple.isArch32Bit() && !NonExistent(Alt32)) Want = WANT64; + if (TargetTriple.isArch32Bit() && !NonExistent(Alt32sparc)) + Want = WANT64; else if (TargetTriple.isArch64Bit() && IsX32 && !NonExistent(Altx32)) Want = WANT64; else if (TargetTriple.isArch64Bit() && !IsX32 && !NonExistent(Alt64)) Want = WANT32; + else if (TargetTriple.isArch64Bit() && !NonExistent(Alt32sparc)) + Want = WANT64; else { if (TargetTriple.isArch32Bit()) Want = NeedsBiarchSuffix ? WANT64 : WANT32; @@ -1919,6 +1929,7 @@ static bool findBiarchMultilibs(const Driver &D, Result.Multilibs.push_back(Alt64); Result.Multilibs.push_back(Alt32); Result.Multilibs.push_back(Altx32); + Result.Multilibs.push_back(Alt32sparc); Result.Multilibs.FilterOut(NonExistent); @@ -1932,7 +1943,8 @@ static bool findBiarchMultilibs(const Driver &D, if (Result.SelectedMultilibs.back() == Alt64 || Result.SelectedMultilibs.back() == Alt32 || - Result.SelectedMultilibs.back() == Altx32) + Result.SelectedMultilibs.back() == Altx32 || + Result.SelectedMultilibs.back() == Alt32sparc) Result.BiarchSibling = Default; return true; @@ -2215,6 +2227,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // so we need to find those /usr/gcc/*/lib/gcc libdirs and go with // /usr/gcc/ as a prefix. + SmallVector, 8> SolarisPrefixes; std::string PrefixDir = concat(SysRoot, "/usr/gcc"); std::error_code EC; for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC), @@ -2232,8 +2245,13 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( if (!D.getVFS().exists(CandidateLibPath)) continue; - Prefixes.push_back(CandidatePrefix); + SolarisPrefixes.emplace_back( + std::make_pair(CandidateVersion, CandidatePrefix)); } + // Sort in reverse order so GCCInstallationDetector::init picks the latest. + std::sort(SolarisPrefixes.rbegin(), SolarisPrefixes.rend()); + for (auto p : SolarisPrefixes) + Prefixes.emplace_back(p.second); return; } diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 7acc600a6aa4..aed4ab1955b4 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -383,6 +383,10 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, if (HTC.ShouldLinkCXXStdlib(Args)) HTC.AddCXXStdlibLibArgs(Args, CmdArgs); } + const ToolChain::path_list &LibPaths = HTC.getFilePaths(); + for (const auto &LibPath : LibPaths) + CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath)); + Args.ClaimAllArgs(options::OPT_L); return; } @@ -441,6 +445,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, const ToolChain::path_list &LibPaths = HTC.getFilePaths(); for (const auto &LibPath : LibPaths) CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath)); + Args.ClaimAllArgs(options::OPT_L); //---------------------------------------------------------------------------- // diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 335a5a88cdfa..de5a69e4ca3f 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -47,11 +47,24 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } +static bool getPIE(const ArgList &Args, const ToolChain &TC) { + if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) || + Args.hasArg(options::OPT_r)) + return false; + + Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie, + options::OPT_nopie); + if (!A) + return TC.isPIEDefault(Args); + return A->getOption().matches(options::OPT_pie); +} + void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const bool IsPIE = getPIE(Args, getToolChain()); ArgStringList CmdArgs; // Demangle C++ names in errors @@ -62,6 +75,11 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("_start"); } + if (IsPIE) { + CmdArgs.push_back("-z"); + CmdArgs.push_back("type=pie"); + } + if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("-dn"); @@ -113,8 +131,13 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, values_xpg = "values-xpg4.o"; CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath(values_xpg))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); + + const char *crtbegin = nullptr; + if (Args.hasArg(options::OPT_shared) || IsPIE) + crtbegin = "crtbeginS.o"; + else + crtbegin = "crtbegin.o"; + CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(crtbegin))); // Add crtfastmath.o if available and fast math is enabled. getToolChain().addFastMathRuntimeIfAvailable(Args, CmdArgs); } @@ -151,24 +174,32 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-lgcc"); CmdArgs.push_back("-lm"); } + const SanitizerArgs &SA = getToolChain().getSanitizerArgs(Args); if (NeedsSanitizerDeps) { linkSanitizerRuntimeDeps(getToolChain(), CmdArgs); // Work around Solaris/amd64 ld bug when calling __tls_get_addr directly. // However, ld -z relax=transtls is available since Solaris 11.2, but not // in Illumos. - const SanitizerArgs &SA = getToolChain().getSanitizerArgs(Args); if (getToolChain().getTriple().getArch() == llvm::Triple::x86_64 && (SA.needsAsanRt() || SA.needsStatsRt() || (SA.needsUbsanRt() && !SA.requiresMinimalRuntime()))) CmdArgs.push_back("-zrelax=transtls"); } + // Avoid AsanInitInternal cycle, Issue #64126. + if (getToolChain().getTriple().isX86() && SA.needsSharedRt() && + SA.needsAsanRt()) + CmdArgs.push_back("-znow"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, options::OPT_r)) { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); + if (Args.hasArg(options::OPT_shared) || IsPIE) + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); + else + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 32619bc56f7a..852437b9390f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -581,7 +581,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && - LBraceStack.back().PrevTok->is(tok::identifier)); + LBraceStack.back().PrevTok->isOneOf(tok::identifier, + tok::greater)); ProbablyBracedList = ProbablyBracedList || @@ -2464,7 +2465,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; const bool Blacklisted = PrevPrev && - (PrevPrev->is(tok::kw___attribute) || + (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || (SeenEqual && (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h index e447590393ec..6166317f8f9d 100644 --- a/clang/lib/Headers/__clang_cuda_math.h +++ b/clang/lib/Headers/__clang_cuda_math.h @@ -36,7 +36,7 @@ // because the OpenMP overlay requires constexpr functions here but prior to // c++14 void return functions could not be constexpr. #pragma push_macro("__DEVICE_VOID__") -#ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L +#if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE_VOID__ __DEVICE__ diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index ed576027cb5e..f15198b3d9f9 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -317,7 +317,7 @@ __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); -#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 560 || 1 +#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 560 #define __DEPRECATED_SINCE_HIP_560(X) __attribute__((deprecated(X))) #else #define __DEPRECATED_SINCE_HIP_560(X) diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index 454f74e92f85..1ad6853a97c9 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -328,14 +328,4 @@ static __inline int __get_cpuid_count (unsigned int __leaf, return 1; } -// If MS extensions are enabled, __cpuidex is defined as a builtin which will -// conflict with the __cpuidex definition below. -#ifndef _MSC_EXTENSIONS -static __inline void __cpuidex (int __cpu_info[4], int __leaf, int __subleaf) -{ - __cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2], - __cpu_info[3]); -} -#endif - #endif /* __CPUID_H */ diff --git a/clang/lib/Interpreter/IncrementalExecutor.cpp b/clang/lib/Interpreter/IncrementalExecutor.cpp index 3f8d60630de4..2c4dfc9a611e 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.cpp +++ b/clang/lib/Interpreter/IncrementalExecutor.cpp @@ -92,12 +92,19 @@ llvm::Error IncrementalExecutor::runCtors() const { llvm::Expected IncrementalExecutor::getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const { - auto Sym = (NameKind == LinkerName) ? Jit->lookupLinkerMangled(Name) - : Jit->lookup(Name); - - if (!Sym) - return Sym.takeError(); - return Sym; + using namespace llvm::orc; + auto SO = makeJITDylibSearchOrder({&Jit->getMainJITDylib(), + Jit->getPlatformJITDylib().get(), + Jit->getProcessSymbolsJITDylib().get()}); + + ExecutionSession &ES = Jit->getExecutionSession(); + + auto SymOrErr = + ES.lookup(SO, (NameKind == LinkerName) ? ES.intern(Name) + : Jit->mangleAndIntern(Name)); + if (auto Err = SymOrErr.takeError()) + return std::move(Err); + return SymOrErr->getAddress(); } } // end namespace clang diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 3b9913ac8ba4..a6f50832950c 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -57,6 +57,26 @@ static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) { } } +static unsigned getEncodingPrefixLen(tok::TokenKind kind) { + switch (kind) { + default: + llvm_unreachable("Unknown token type!"); + case tok::char_constant: + case tok::string_literal: + return 0; + case tok::utf8_char_constant: + case tok::utf8_string_literal: + return 2; + case tok::wide_char_constant: + case tok::wide_string_literal: + case tok::utf16_char_constant: + case tok::utf16_string_literal: + case tok::utf32_char_constant: + case tok::utf32_string_literal: + return 1; + } +} + static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, @@ -343,7 +363,9 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, diag::err_unevaluated_string_invalid_escape_sequence) << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin); + HadError = true; } + return ResultChar; } @@ -1917,9 +1939,22 @@ void StringLiteralParser::init(ArrayRef StringToks){ // Remember if we see any wide or utf-8/16/32 strings. // Also check for illegal concatenations. if (isUnevaluated() && Tok.getKind() != tok::string_literal) { - if (Diags) - Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix); - hadError = true; + if (Diags) { + SourceLocation PrefixEndLoc = Lexer::AdvanceToTokenCharacter( + Tok.getLocation(), getEncodingPrefixLen(Tok.getKind()), SM, + Features); + CharSourceRange Range = + CharSourceRange::getCharRange({Tok.getLocation(), PrefixEndLoc}); + StringRef Prefix(SM.getCharacterData(Tok.getLocation()), + getEncodingPrefixLen(Tok.getKind())); + Diags->Report(Tok.getLocation(), + Features.CPlusPlus26 + ? diag::err_unevaluated_string_prefix + : diag::warn_unevaluated_string_prefix) + << Prefix << Features.CPlusPlus << FixItHint::CreateRemoval(Range); + } + if (Features.CPlusPlus26) + hadError = true; } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) { if (isOrdinary()) { Kind = Tok.getKind(); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index c1e09db2b3ee..d9ff6c42c502 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1016,10 +1016,23 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) { return nullptr; } - if (isTokenStringLiteral()) - AssertMessage = ParseUnevaluatedStringLiteralExpression(); - else if (getLangOpts().CPlusPlus26) + bool ParseAsExpression = false; + if (getLangOpts().CPlusPlus26) { + for (unsigned I = 0;; ++I) { + const Token &T = GetLookAheadToken(I); + if (T.is(tok::r_paren)) + break; + if (!tok::isStringLiteral(Tok.getKind())) { + ParseAsExpression = true; + break; + } + } + } + + if (ParseAsExpression) AssertMessage = ParseConstantExpressionInExprEvalContext(); + else if (tok::isStringLiteral(Tok.getKind())) + AssertMessage = ParseUnevaluatedStringLiteralExpression(); else { Diag(Tok, diag::err_expected_string_literal) << /*Source='static_assert'*/ 1; diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index b7c83bbeb82e..664337052500 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -62,6 +62,7 @@ bool Parser::isCXXDeclarationStatement( case tok::kw_static_assert: case tok::kw__Static_assert: return true; + case tok::coloncolon: case tok::identifier: { if (DisambiguatingWithExpression) { RevertingTentativeParsingAction TPA(*this); diff --git a/clang/lib/Sema/SemaAvailability.cpp b/clang/lib/Sema/SemaAvailability.cpp index 05ad42780e50..84c06566387c 100644 --- a/clang/lib/Sema/SemaAvailability.cpp +++ b/clang/lib/Sema/SemaAvailability.cpp @@ -123,6 +123,18 @@ ShouldDiagnoseAvailabilityInContext(Sema &S, AvailabilityResult K, const NamedDecl *OffendingDecl) { assert(K != AR_Available && "Expected an unavailable declaration here!"); + // If this was defined using CF_OPTIONS, etc. then ignore the diagnostic. + auto DeclLoc = Ctx->getBeginLoc(); + // This is only a problem in Foundation's C++ implementation for CF_OPTIONS. + if (DeclLoc.isMacroID() && S.getLangOpts().CPlusPlus && + isa(OffendingDecl)) { + StringRef MacroName = S.getPreprocessor().getImmediateMacroName(DeclLoc); + if (MacroName == "CF_OPTIONS" || MacroName == "OBJC_OPTIONS" || + MacroName == "SWIFT_OPTIONS" || MacroName == "NS_OPTIONS") { + return false; + } + } + // Checks if we should emit the availability diagnostic in the context of C. auto CheckContext = [&](const Decl *C) { if (K == AR_NotYetIntroduced) { diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index d65ecf52c523..b338d601db73 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -935,6 +935,14 @@ void CastOperation::CheckDynamicCast() { << isClangCL; } + // For a dynamic_cast to a final type, IR generation might emit a reference + // to the vtable. + if (DestRecord) { + auto *DestDecl = DestRecord->getAsCXXRecordDecl(); + if (DestDecl->isEffectivelyFinal()) + Self.MarkVTableUsed(OpRange.getBegin(), DestDecl); + } + // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a4bf57928470..21b5781a71cd 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9154,7 +9154,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, bool HasPrototype = (D.isFunctionDeclarator() && D.getFunctionTypeInfo().hasPrototype) || (D.getDeclSpec().isTypeRep() && - D.getDeclSpec().getRepAsType().get()->isFunctionProtoType()) || + SemaRef.GetTypeFromParser(D.getDeclSpec().getRepAsType(), nullptr) + ->isFunctionProtoType()) || (!R->getAsAdjusted() && R->isFunctionProtoType()); assert( (HasPrototype || !SemaRef.getLangOpts().requiresStrictPrototypes()) && diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2716b6677105..3a5e302cc03a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13880,56 +13880,6 @@ inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, return InvalidOperands(Loc, LHS, RHS); } -// Diagnose cases where the user write a logical and/or but probably meant a -// bitwise one. We do this when one of the operands is a non-bool integer and -// the other is a constant. -void Sema::diagnoseLogicalInsteadOfBitwise(Expr *Op1, Expr *Op2, - SourceLocation Loc, - BinaryOperatorKind Opc) { - if (Op1->getType()->isIntegerType() && !Op1->getType()->isBooleanType() && - Op2->getType()->isIntegerType() && !Op2->isValueDependent() && - // Don't warn in macros or template instantiations. - !Loc.isMacroID() && !inTemplateInstantiation() && - !Op2->getExprLoc().isMacroID() && - !Op1->getExprLoc().isMacroID()) { - bool IsOp1InMacro = Op1->getExprLoc().isMacroID(); - bool IsOp2InMacro = Op2->getExprLoc().isMacroID(); - - // Exclude the specific expression from triggering the warning. - if (!(IsOp1InMacro && IsOp2InMacro && Op1->getSourceRange() == Op2->getSourceRange())) { - // If the RHS can be constant folded, and if it constant folds to something - // that isn't 0 or 1 (which indicate a potential logical operation that - // happened to fold to true/false) then warn. - // Parens on the RHS are ignored. - // If the RHS can be constant folded, and if it constant folds to something - // that isn't 0 or 1 (which indicate a potential logical operation that - // happened to fold to true/false) then warn. - // Parens on the RHS are ignored. - Expr::EvalResult EVResult; - if (Op2->EvaluateAsInt(EVResult, Context)) { - llvm::APSInt Result = EVResult.Val.getInt(); - if ((getLangOpts().Bool && !Op2->getType()->isBooleanType() && - !Op2->getExprLoc().isMacroID()) || - (Result != 0 && Result != 1)) { - Diag(Loc, diag::warn_logical_instead_of_bitwise) - << Op2->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); - // Suggest replacing the logical operator with the bitwise version - Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) - << (Opc == BO_LAnd ? "&" : "|") - << FixItHint::CreateReplacement( - SourceRange(Loc, getLocForEndOfToken(Loc)), - Opc == BO_LAnd ? "&" : "|"); - if (Opc == BO_LAnd) - // Suggest replacing "Foo() && kNonZero" with "Foo()" - Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) - << FixItHint::CreateRemoval(SourceRange( - getLocForEndOfToken(Op1->getEndLoc()), Op2->getEndLoc())); - } - } - } - } -} - // C99 6.5.[13,14] inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, @@ -13948,6 +13898,9 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, } } + if (EnumConstantInBoolContext) + Diag(Loc, diag::warn_enum_constant_in_bool_context); + // WebAssembly tables can't be used with logical operators. QualType LHSTy = LHS.get()->getType(); QualType RHSTy = RHS.get()->getType(); @@ -13958,14 +13911,40 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, return InvalidOperands(Loc, LHS, RHS); } - if (EnumConstantInBoolContext) { - // Warn when converting the enum constant to a boolean - Diag(Loc, diag::warn_enum_constant_in_bool_context); - } else { - // Diagnose cases where the user write a logical and/or but probably meant a - // bitwise one. - diagnoseLogicalInsteadOfBitwise(LHS.get(), RHS.get(), Loc, Opc); - diagnoseLogicalInsteadOfBitwise(RHS.get(), LHS.get(), Loc, Opc); + // Diagnose cases where the user write a logical and/or but probably meant a + // bitwise one. We do this when the LHS is a non-bool integer and the RHS + // is a constant. + if (!EnumConstantInBoolContext && LHS.get()->getType()->isIntegerType() && + !LHS.get()->getType()->isBooleanType() && + RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() && + // Don't warn in macros or template instantiations. + !Loc.isMacroID() && !inTemplateInstantiation()) { + // If the RHS can be constant folded, and if it constant folds to something + // that isn't 0 or 1 (which indicate a potential logical operation that + // happened to fold to true/false) then warn. + // Parens on the RHS are ignored. + Expr::EvalResult EVResult; + if (RHS.get()->EvaluateAsInt(EVResult, Context)) { + llvm::APSInt Result = EVResult.Val.getInt(); + if ((getLangOpts().Bool && !RHS.get()->getType()->isBooleanType() && + !RHS.get()->getExprLoc().isMacroID()) || + (Result != 0 && Result != 1)) { + Diag(Loc, diag::warn_logical_instead_of_bitwise) + << RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "||"); + // Suggest replacing the logical operator with the bitwise version + Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator) + << (Opc == BO_LAnd ? "&" : "|") + << FixItHint::CreateReplacement( + SourceRange(Loc, getLocForEndOfToken(Loc)), + Opc == BO_LAnd ? "&" : "|"); + if (Opc == BO_LAnd) + // Suggest replacing "Foo() && kNonZero" with "Foo()" + Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant) + << FixItHint::CreateRemoval( + SourceRange(getLocForEndOfToken(LHS.get()->getEndLoc()), + RHS.get()->getEndLoc())); + } + } } if (!Context.getLangOpts().CPlusPlus) { diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index c4f4edb6666c..d1ff688c2a21 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -513,42 +513,21 @@ void LookupResult::resolveKind() { const NamedDecl *HasNonFunction = nullptr; llvm::SmallVector EquivalentNonFunctions; - llvm::BitVector RemovedDecls(N); - for (unsigned I = 0; I < N; I++) { + unsigned UniqueTagIndex = 0; + + unsigned I = 0; + while (I < N) { const NamedDecl *D = Decls[I]->getUnderlyingDecl(); D = cast(D->getCanonicalDecl()); // Ignore an invalid declaration unless it's the only one left. // Also ignore HLSLBufferDecl which not have name conflict with other Decls. - if ((D->isInvalidDecl() || isa(D)) && - N - RemovedDecls.count() > 1) { - RemovedDecls.set(I); + if ((D->isInvalidDecl() || isa(D)) && !(I == 0 && N == 1)) { + Decls[I] = Decls[--N]; continue; } - // C++ [basic.scope.hiding]p2: - // A class name or enumeration name can be hidden by the name of - // an object, function, or enumerator declared in the same - // scope. If a class or enumeration name and an object, function, - // or enumerator are declared in the same scope (in any order) - // with the same name, the class or enumeration name is hidden - // wherever the object, function, or enumerator name is visible. - if (HideTags && isa(D)) { - bool Hidden = false; - for (auto *OtherDecl : Decls) { - if (canHideTag(OtherDecl) && - getContextForScopeMatching(OtherDecl)->Equals( - getContextForScopeMatching(Decls[I]))) { - RemovedDecls.set(I); - Hidden = true; - break; - } - } - if (Hidden) - continue; - } - std::optional ExistingI; // Redeclarations of types via typedef can occur both within a scope @@ -581,7 +560,7 @@ void LookupResult::resolveKind() { if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I], Decls[*ExistingI])) Decls[*ExistingI] = Decls[I]; - RemovedDecls.set(I); + Decls[I] = Decls[--N]; continue; } @@ -592,6 +571,7 @@ void LookupResult::resolveKind() { } else if (isa(D)) { if (HasTag) Ambiguous = true; + UniqueTagIndex = I; HasTag = true; } else if (isa(D)) { HasFunction = true; @@ -607,7 +587,7 @@ void LookupResult::resolveKind() { if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction, D)) { EquivalentNonFunctions.push_back(D); - RemovedDecls.set(I); + Decls[I] = Decls[--N]; continue; } @@ -615,6 +595,28 @@ void LookupResult::resolveKind() { } HasNonFunction = D; } + I++; + } + + // C++ [basic.scope.hiding]p2: + // A class name or enumeration name can be hidden by the name of + // an object, function, or enumerator declared in the same + // scope. If a class or enumeration name and an object, function, + // or enumerator are declared in the same scope (in any order) + // with the same name, the class or enumeration name is hidden + // wherever the object, function, or enumerator name is visible. + // But it's still an error if there are distinct tag types found, + // even if they're not visible. (ref?) + if (N > 1 && HideTags && HasTag && !Ambiguous && + (HasFunction || HasNonFunction || HasUnresolved)) { + const NamedDecl *OtherDecl = Decls[UniqueTagIndex ? 0 : N - 1]; + if (isa(Decls[UniqueTagIndex]->getUnderlyingDecl()) && + getContextForScopeMatching(Decls[UniqueTagIndex])->Equals( + getContextForScopeMatching(OtherDecl)) && + canHideTag(OtherDecl)) + Decls[UniqueTagIndex] = Decls[--N]; + else + Ambiguous = true; } // FIXME: This diagnostic should really be delayed until we're done with @@ -623,15 +625,9 @@ void LookupResult::resolveKind() { getSema().diagnoseEquivalentInternalLinkageDeclarations( getNameLoc(), HasNonFunction, EquivalentNonFunctions); - // Remove decls by replacing them with decls from the end (which - // means that we need to iterate from the end) and then truncating - // to the new size. - for (int I = RemovedDecls.find_last(); I >= 0; I = RemovedDecls.find_prev(I)) - Decls[I] = Decls[--N]; Decls.truncate(N); - if ((HasNonFunction && (HasFunction || HasUnresolved)) || - (HideTags && HasTag && (HasFunction || HasNonFunction || HasUnresolved))) + if (HasNonFunction && (HasFunction || HasUnresolved)) Ambiguous = true; if (Ambiguous) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 10b3587885e3..097e81ea7d45 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7478,6 +7478,10 @@ StmtResult TreeTransform::TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr) { Sema::CompoundScopeRAII CompoundScope(getSema()); + Sema::FPFeaturesStateRAII FPSave(getSema()); + if (S->hasStoredFPFeatures()) + getSema().resetFPOptions( + S->getStoredFPFeatures().applyOverrides(getSema().getLangOpts())); const Stmt *ExprResult = S->getStmtExprResult(); bool SubStmtInvalid = false; diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 10c92f8d2149..c8cbee14be4f 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -181,6 +181,13 @@ namespace clang { static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D); + /// Commit to a primary definition of the class RD, which is known to be + /// a definition of the class. We might not have read the definition data + /// for it yet. If we haven't then allocate placeholder definition data + /// now too. + static CXXRecordDecl *getOrFakePrimaryClassDefinition(ASTReader &Reader, + CXXRecordDecl *RD); + /// Results from loading a RedeclarableDecl. class RedeclarableResult { Decl *MergeWith; @@ -598,7 +605,13 @@ void ASTDeclReader::VisitDecl(Decl *D) { auto *LexicalDC = readDeclAs(); if (!LexicalDC) LexicalDC = SemaDC; - DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC); + // If the context is a class, we might not have actually merged it yet, in + // the case where the definition comes from an update record. + DeclContext *MergedSemaDC; + if (auto *RD = dyn_cast(SemaDC)) + MergedSemaDC = getOrFakePrimaryClassDefinition(Reader, RD); + else + MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC); // Avoid calling setLexicalDeclContext() directly because it uses // Decl::getASTContext() internally which is unsafe during derialization. D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC, @@ -3198,6 +3211,32 @@ uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) { return LocalOffset + M.GlobalBitOffset; } +CXXRecordDecl * +ASTDeclReader::getOrFakePrimaryClassDefinition(ASTReader &Reader, + CXXRecordDecl *RD) { + // Try to dig out the definition. + auto *DD = RD->DefinitionData; + if (!DD) + DD = RD->getCanonicalDecl()->DefinitionData; + + // If there's no definition yet, then DC's definition is added by an update + // record, but we've not yet loaded that update record. In this case, we + // commit to DC being the canonical definition now, and will fix this when + // we load the update record. + if (!DD) { + DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD); + RD->setCompleteDefinition(true); + RD->DefinitionData = DD; + RD->getCanonicalDecl()->DefinitionData = DD; + + // Track that we did this horrible thing so that we can fix it later. + Reader.PendingFakeDefinitionData.insert( + std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake)); + } + + return DD->Definition; +} + /// Find the context in which we should search for previous declarations when /// looking for declarations to merge. DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader, @@ -3205,29 +3244,8 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader, if (auto *ND = dyn_cast(DC)) return ND->getOriginalNamespace(); - if (auto *RD = dyn_cast(DC)) { - // Try to dig out the definition. - auto *DD = RD->DefinitionData; - if (!DD) - DD = RD->getCanonicalDecl()->DefinitionData; - - // If there's no definition yet, then DC's definition is added by an update - // record, but we've not yet loaded that update record. In this case, we - // commit to DC being the canonical definition now, and will fix this when - // we load the update record. - if (!DD) { - DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD); - RD->setCompleteDefinition(true); - RD->DefinitionData = DD; - RD->getCanonicalDecl()->DefinitionData = DD; - - // Track that we did this horrible thing so that we can fix it later. - Reader.PendingFakeDefinitionData.insert( - std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake)); - } - - return DD->Definition; - } + if (auto *RD = dyn_cast(DC)) + return getOrFakePrimaryClassDefinition(Reader, RD); if (auto *RD = dyn_cast(DC)) return RD->getDefinition(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 59dbc36d24e8..8dd78152bd68 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -580,7 +580,7 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { - static_assert(DeclContext::NumFunctionDeclBits == 30, + static_assert(DeclContext::NumFunctionDeclBits == 31, "You need to update the serializer after you change the " "FunctionDeclBits"); @@ -1495,7 +1495,7 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { } void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) { - static_assert(DeclContext::NumCXXConstructorDeclBits == 21, + static_assert(DeclContext::NumCXXConstructorDeclBits == 20, "You need to update the serializer after you change the " "CXXConstructorDeclBits"); diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index b9b82564b330..5158e99b75e5 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -588,19 +588,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) { return REAL(strncpy)(to, from, size); } -INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtol); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtol)(nptr, endptr, base); - } +template +static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { + if (!flags()->replace_str) + return real(nptr, endptr, base); char *real_endptr; - long result = REAL(strtol)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; + return res; } +# define INTERCEPTOR_STRTO_BASE(ret_type, func) \ + INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \ + void *ctx; \ + ASAN_INTERCEPTOR_ENTER(ctx, func); \ + ENSURE_ASAN_INITED(); \ + return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \ + } + +INTERCEPTOR_STRTO_BASE(long, strtol) +INTERCEPTOR_STRTO_BASE(long long, strtoll) + +# if SANITIZER_GLIBC +INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol) +INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll) +# endif + INTERCEPTOR(int, atoi, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoi); @@ -639,20 +654,6 @@ INTERCEPTOR(long, atol, const char *nptr) { return result; } -#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL -INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtoll); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtoll)(nptr, endptr, base); - } - char *real_endptr; - long long result = REAL(strtoll)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; -} - INTERCEPTOR(long long, atoll, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoll); @@ -666,7 +667,6 @@ INTERCEPTOR(long long, atoll, const char *nptr) { ASAN_READ_STRING(ctx, nptr, (real_endptr - nptr) + 1); return result; } -#endif // ASAN_INTERCEPT_ATOLL_AND_STRTOLL #if ASAN_INTERCEPT___CXA_ATEXIT || ASAN_INTERCEPT_ATEXIT static void AtCxaAtexit(void *unused) { @@ -751,11 +751,13 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC(atoi); ASAN_INTERCEPT_FUNC(atol); - ASAN_INTERCEPT_FUNC(strtol); -#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL ASAN_INTERCEPT_FUNC(atoll); + ASAN_INTERCEPT_FUNC(strtol); ASAN_INTERCEPT_FUNC(strtoll); -#endif +# if SANITIZER_GLIBC + ASAN_INTERCEPT_FUNC(__isoc23_strtol); + ASAN_INTERCEPT_FUNC(__isoc23_strtoll); +# endif // Intecept jump-related functions. ASAN_INTERCEPT_FUNC(longjmp); diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 268096fea5e7..d00d05587b36 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -42,12 +42,10 @@ void InitializePlatformInterceptors(); // Use macro to describe if specific function should be // intercepted on a given platform. #if !SANITIZER_WINDOWS -# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 1 # define ASAN_INTERCEPT__LONGJMP 1 # define ASAN_INTERCEPT_INDEX 1 # define ASAN_INTERCEPT_PTHREAD_CREATE 1 #else -# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0 # define ASAN_INTERCEPT__LONGJMP 0 # define ASAN_INTERCEPT_INDEX 0 # define ASAN_INTERCEPT_PTHREAD_CREATE 0 diff --git a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp index e3a90f18ed81..0fa636bec0d0 100644 --- a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp +++ b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp @@ -65,6 +65,7 @@ INTERCEPT_WRAP_W_W(_expand_dbg) INTERCEPT_LIBRARY_FUNCTION(atoi); INTERCEPT_LIBRARY_FUNCTION(atol); +INTERCEPT_LIBRARY_FUNCTION(atoll); INTERCEPT_LIBRARY_FUNCTION(frexp); INTERCEPT_LIBRARY_FUNCTION(longjmp); #if SANITIZER_INTERCEPT_MEMCHR @@ -91,6 +92,7 @@ INTERCEPT_LIBRARY_FUNCTION(strspn); INTERCEPT_LIBRARY_FUNCTION(strstr); INTERCEPT_LIBRARY_FUNCTION(strtok); INTERCEPT_LIBRARY_FUNCTION(strtol); +INTERCEPT_LIBRARY_FUNCTION(strtoll); INTERCEPT_LIBRARY_FUNCTION(wcslen); INTERCEPT_LIBRARY_FUNCTION(wcsnlen); diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 54cbda059315..2ac99b25c243 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -113,7 +113,7 @@ void __clear_cache(void *start, void *end) { #elif defined(__linux__) || defined(__OpenBSD__) // Pre-R6 may not be globalized. And some implementations may give strange // synci_step. So, let's use libc call for it. - cacheflush(start, end_int - start_int, BCACHE); + _flush_cache(start, end_int - start_int, BCACHE); #else (void)start_int; (void)end_int; diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c index 36eb696c39ee..0bd7296fb252 100644 --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -751,8 +751,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VP2INTERSECT); + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. bool HasLeaf7Subleaf1 = - MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512BF16); diff --git a/compiler-rt/lib/interception/interception.h b/compiler-rt/lib/interception/interception.h index 078d33b61be3..069f73d276f3 100644 --- a/compiler-rt/lib/interception/interception.h +++ b/compiler-rt/lib/interception/interception.h @@ -181,7 +181,7 @@ const interpose_substitution substitution_##func_name[] \ // FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher // priority than weak ones so weak aliases won't work for indirect calls // in position-independent (-fPIC / -fPIE) mode. -# define __ASM_WEAK_WRAPPER(func) +# define __ASM_WEAK_WRAPPER(func) ".globl " #func "\n" # else # define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n" # endif // SANITIZER_FREEBSD || SANITIZER_NETBSD diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index f5e0d3cb9a67..ba92bd14d319 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t) +#if SANITIZER_GLIBC +INTERCEPTORS_STRTO(double, __isoc23_strtod, char) +INTERCEPTORS_STRTO(float, __isoc23_strtof, char) +INTERCEPTORS_STRTO(long double, __isoc23_strtold, char) +INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char) +INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char) + +INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t) +INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t) +INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t) +INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t) +#endif + #if SANITIZER_NETBSD #define INTERCEPT_STRTO(func) \ INTERCEPT_FUNCTION(func); \ @@ -1748,6 +1767,24 @@ void InitializeInterceptors() { INTERCEPT_STRTO(wcstoul); INTERCEPT_STRTO(wcstoll); INTERCEPT_STRTO(wcstoull); +#if SANITIZER_GLIBC + INTERCEPT_STRTO(__isoc23_strtod); + INTERCEPT_STRTO(__isoc23_strtof); + INTERCEPT_STRTO(__isoc23_strtold); + INTERCEPT_STRTO(__isoc23_strtol); + INTERCEPT_STRTO(__isoc23_strtoul); + INTERCEPT_STRTO(__isoc23_strtoll); + INTERCEPT_STRTO(__isoc23_strtoull); + INTERCEPT_STRTO(__isoc23_strtouq); + INTERCEPT_STRTO(__isoc23_wcstod); + INTERCEPT_STRTO(__isoc23_wcstof); + INTERCEPT_STRTO(__isoc23_wcstold); + INTERCEPT_STRTO(__isoc23_wcstol); + INTERCEPT_STRTO(__isoc23_wcstoul); + INTERCEPT_STRTO(__isoc23_wcstoll); + INTERCEPT_STRTO(__isoc23_wcstoull); +#endif + #ifdef SANITIZER_NLDBL_VERSION INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION); INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION); diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 54e3030d5899..2bd6a49ce065 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -424,10 +424,13 @@ static void createProfileDir(const char *Filename) { * its instrumented shared libraries dump profile data into their own data file. */ static FILE *openFileForMerging(const char *ProfileFileName, int *MergeDone) { - FILE *ProfileFile = getProfileFile(); + FILE *ProfileFile = NULL; int rc; - if (!ProfileFile) { + ProfileFile = getProfileFile(); + if (ProfileFile) { + lprofLockFileHandle(ProfileFile); + } else { createProfileDir(ProfileFileName); ProfileFile = lprofOpenFileEx(ProfileFileName); } @@ -478,6 +481,9 @@ static int writeFile(const char *OutputName) { if (OutputFile == getProfileFile()) { fflush(OutputFile); + if (doMerging()) { + lprofUnlockFileHandle(OutputFile); + } } else { fclose(OutputFile); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 299561b3ad3a..0e563fa12022 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap) INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap) VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap) + +INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap) + +INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format, + va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap) + +INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap) #endif // SANITIZER_INTERCEPT_ISOC99_SCANF INTERCEPTOR(int, scanf, const char *format, ...) @@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format) INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...) FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) + +INTERCEPTOR(int, __isoc23_scanf, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format) + +INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format) + +INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format) #endif #endif @@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \ - COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); + COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_scanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf); #else #define INIT_ISOC99_SCANF #endif @@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr, (real_endptr - nptr) + 1 : 0); } - #if SANITIZER_INTERCEPT_STRTOIMAX -INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. +template +static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { char *real_endptr; - INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); return res; } +INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base); +} INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. - char *real_endptr; - UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return res; + return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base); } #define INIT_STRTOIMAX \ @@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { #define INIT_STRTOIMAX #endif +#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC +INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base); +} +INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base); +} + +# define INIT_STRTOIMAX_C23 \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax); +#else +# define INIT_STRTOIMAX_C23 +#endif + #if SANITIZER_INTERCEPT_MBSTOWCS INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) { void *ctx; @@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() { INIT_GETCWD; INIT_GET_CURRENT_DIR_NAME; INIT_STRTOIMAX; + INIT_STRTOIMAX_C23; INIT_MBSTOWCS; INIT_MBSNRTOWCS; INIT_WCSTOMBS; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp index 1e635a66978f..a2000798a390 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp @@ -30,13 +30,7 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, // TODO(yln): add arg sanity check for stack_top/stack_bottom CHECK_GE(max_depth, 2); const uptr kPageSize = GetPageSizeCached(); -#if defined(__GNUC__) - // __builtin_return_address returns the address of the call instruction - // on the SPARC and not the return address, so we need to compensate. - trace_buffer[0] = GetNextInstructionPc(pc); -#else trace_buffer[0] = pc; -#endif size = 1; if (stack_top < 4096) return; // Sanity check for stack top. // Flush register windows to memory diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp index 72f025a7d307..6a8e82e2e213 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp @@ -139,13 +139,7 @@ void BufferedStackTrace::UnwindSlow(uptr pc, u32 max_depth) { if (to_pop == 0 && size > 1) to_pop = 1; PopStackFrames(to_pop); -#if defined(__GNUC__) && defined(__sparc__) - // __builtin_return_address returns the address of the call instruction - // on the SPARC and not the return address, so we need to compensate. - trace_buffer[0] = GetNextInstructionPc(pc); -#else trace_buffer[0] = pc; -#endif } void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) { diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index 509e3f19fe38..819cfca44b00 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -34,6 +34,13 @@ __interceptor_pthread_setspecific w __interceptor_read w __interceptor_realpath w __isinf U +__isoc23_sscanf U +__isoc23_strtol U +__isoc23_strtoll U +__isoc23_strtoll_l U +__isoc23_strtoull U +__isoc23_strtoull_l U +__isoc23_vsscanf U __isoc99_sscanf U __isoc99_vsscanf U __moddi3 U diff --git a/libcxx/include/__algorithm/pstl_sort.h b/libcxx/include/__algorithm/pstl_sort.h index 81514953f24b..75c77ed40527 100644 --- a/libcxx/include/__algorithm/pstl_sort.h +++ b/libcxx/include/__algorithm/pstl_sort.h @@ -17,6 +17,7 @@ #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 27ec0a295f4f..bb62c1ce10c1 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -245,6 +245,9 @@ __handle_replacement_field(_Iterator __begin, _Iterator __end, using _CharT = iter_value_t<_Iterator>; __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx); + if (__r.__last == __end) + std::__throw_format_error("The argument index should end with a ':' or a '}'"); + bool __parse = *__r.__last == _CharT(':'); switch (*__r.__last) { case _CharT(':'): diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h index 0e2e91af7d19..5946ed698e0f 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h @@ -10,6 +10,7 @@ #define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H #include <__config> +#include <__locale> // for locale_t #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__mdspan/layout_left.h b/libcxx/include/__mdspan/layout_left.h index f890c5ae0256..7503dcf77d13 100644 --- a/libcxx/include/__mdspan/layout_left.h +++ b/libcxx/include/__mdspan/layout_left.h @@ -164,7 +164,7 @@ class layout_left::mapping { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __r < extents_type::rank(), "layout_left::mapping::stride(): invalid rank index"); index_type __s = 1; - for (rank_type __i = extents_type::rank() - 1; __i > __r; __i--) + for (rank_type __i = 0; __i < __r; __i++) __s *= __extents_.extent(__i); return __s; } diff --git a/libcxx/include/__std_clang_module b/libcxx/include/__std_clang_module new file mode 100644 index 000000000000..4d02336d30b0 --- /dev/null +++ b/libcxx/include/__std_clang_module @@ -0,0 +1,226 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_std_clang_module_header.py +// DO NOT MODIFY! + +// This header should not be directly included, it's exclusively to import all +// of the libc++ public clang modules for the `std` clang module to export. In +// other words, it's to facilitate `@import std;` in Objective-C++ and `import std` +// in Swift to expose all of the libc++ interfaces. This is generally not +// recommended, however there are some clients that need to import all of libc++ +// without knowing what "all" is. +#if !__building_module(std) +# error "Do not include this header directly, include individual headers instead" +#endif + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) +# include +#endif +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif diff --git a/libcxx/include/__type_traits/is_nothrow_constructible.h b/libcxx/include/__type_traits/is_nothrow_constructible.h index d4686d89fd96..4949062433b7 100644 --- a/libcxx/include/__type_traits/is_nothrow_constructible.h +++ b/libcxx/include/__type_traits/is_nothrow_constructible.h @@ -22,7 +22,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_builtin(__is_nothrow_constructible) +// GCC is disabled due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106611 +#if __has_builtin(__is_nothrow_constructible) && !defined(_LIBCPP_COMPILER_GCC) template < class _Tp, class... _Args> struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible diff --git a/libcxx/include/__type_traits/remove_cv.h b/libcxx/include/__type_traits/remove_cv.h index 8fe8fb0e4959..c4bf612794bd 100644 --- a/libcxx/include/__type_traits/remove_cv.h +++ b/libcxx/include/__type_traits/remove_cv.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_builtin(__remove_cv) +#if __has_builtin(__remove_cv) && !defined(_LIBCPP_COMPILER_GCC) template struct remove_cv { using type _LIBCPP_NODEBUG = __remove_cv(_Tp); diff --git a/libcxx/include/__type_traits/remove_cvref.h b/libcxx/include/__type_traits/remove_cvref.h index 4dc950ac31ad..e8e8745ab096 100644 --- a/libcxx/include/__type_traits/remove_cvref.h +++ b/libcxx/include/__type_traits/remove_cvref.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_builtin(__remove_cvref) +#if __has_builtin(__remove_cvref) && !defined(_LIBCPP_COMPILER_GCC) template using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cvref(_Tp); #else diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 0b418d2b7897..37a9edcd7ece 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -16,7 +16,6 @@ module std_atomic [system] { export * } module std_barrier [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "barrier" export * } @@ -37,7 +36,6 @@ module std_chrono [system] { export * } module std_codecvt [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "codecvt" export * } @@ -78,7 +76,6 @@ module std_expected [system] { export * } module std_filesystem [system] { - @requires_LIBCXX_ENABLE_FILESYSTEM@ header "filesystem" export * } @@ -91,8 +88,6 @@ module std_forward_list [system] { export * } module std_fstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ - @requires_LIBCXX_ENABLE_FILESYSTEM@ header "fstream" export * } @@ -101,7 +96,6 @@ module std_functional [system] { export * } module std_future [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "future" export * } @@ -110,12 +104,10 @@ module std_initializer_list [system] { export * } module std_iomanip [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "iomanip" export * } module std_ios [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "ios" export * } @@ -124,12 +116,10 @@ module std_iosfwd [system] { export * } module std_iostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "iostream" export * } module std_istream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "istream" export * } @@ -138,7 +128,6 @@ module std_iterator [system] { export * } module std_latch [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "latch" export * } @@ -151,7 +140,6 @@ module std_list [system] { export * } module std_locale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "locale" export * } @@ -192,7 +180,6 @@ module std_optional [system] { export * } module std_ostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "ostream" export * } @@ -217,7 +204,6 @@ module std_ratio [system] { export * } module std_regex [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "regex" export * } @@ -226,7 +212,6 @@ module std_scoped_allocator [system] { export * } module std_semaphore [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "semaphore" export * } @@ -235,7 +220,6 @@ module std_set [system] { export * } module std_shared_mutex [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "shared_mutex" export std_version } @@ -250,7 +234,6 @@ module std_span [system] { export std_private_span_span_fwd } module std_sstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "sstream" export * } @@ -263,12 +246,10 @@ module std_stdexcept [system] { export * } module std_stop_token { - @requires_LIBCXX_ENABLE_THREADS@ header "stop_token" export * } module std_streambuf [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "streambuf" export * } @@ -281,7 +262,6 @@ module std_string_view [system] { export * } module std_strstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "strstream" export * } @@ -290,7 +270,6 @@ module std_system_error [system] { export * } module std_thread [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "thread" export * } @@ -377,7 +356,6 @@ module std_climits [system] { export * } module std_clocale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "clocale" export * } @@ -435,12 +413,10 @@ module std_cuchar [system] { export * } module std_cwchar [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "cwchar" export * } module std_cwctype [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "cwctype" export * } @@ -477,7 +453,6 @@ module std_limits_h [system] { export * } module std_locale_h [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "locale.h" export * } @@ -493,8 +468,6 @@ module std_setjmp_h [system] { // FIXME: is missing. // provided by compiler. module std_stdatomic_h [system] { - @requires_LIBCXX_ENABLE_THREADS@ - requires cplusplus23 header "stdatomic.h" export * } @@ -536,21 +509,17 @@ module std_uchar_h [system] { } // provided by C library. module std_wchar_h [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ // 's __need_* macros require textual inclusion. textual header "wchar.h" export * } module std_wctype_h [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "wctype.h" export * } // Experimental C++ standard library interfaces module std_experimental [system] { - requires cplusplus11 - module deque { header "experimental/deque" export * @@ -627,6 +596,13 @@ module std_experimental [system] { } } +// Convenience method to get all of the above modules in a single import statement. +// Importing only the needed modules is likely to be more performant. +module std [system] { + header "__std_clang_module" + export * +} + // Implementation detail headers that are private to libc++. These modules // must not be directly imported. module std_private_assert [system] { @@ -650,7 +626,6 @@ module std_private_hash_table [system] { export * } module std_private_locale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__locale" export * } @@ -819,6 +794,7 @@ module std_private_algorithm_pstl_generate [system module std_private_algorithm_pstl_is_partitioned [system] { header "__algorithm/pstl_is_partitioned.h" } module std_private_algorithm_pstl_merge [system] { header "__algorithm/pstl_merge.h" } module std_private_algorithm_pstl_replace [system] { header "__algorithm/pstl_replace.h" } +module std_private_algorithm_pstl_sort [system] { header "__algorithm/pstl_sort.h" } module std_private_algorithm_pstl_stable_sort [system] { header "__algorithm/pstl_stable_sort.h" export std_private_functional_operations @@ -1188,7 +1164,6 @@ module std_private_chrono_duration [system] { } module std_private_chrono_file_clock [system] { header "__chrono/file_clock.h" } module std_private_chrono_formatter [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/formatter.h" } module std_private_chrono_hh_mm_ss [system] { header "__chrono/hh_mm_ss.h" } @@ -1202,11 +1177,9 @@ module std_private_chrono_month [system] { header "__chrono/mon module std_private_chrono_month_weekday [system] { header "__chrono/month_weekday.h" } module std_private_chrono_monthday [system] { header "__chrono/monthday.h" } module std_private_chrono_ostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/ostream.h" } module std_private_chrono_parser_std_format_spec [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/parser_std_format_spec.h" } module std_private_chrono_statically_widen [system] { header "__chrono/statically_widen.h" } @@ -1530,7 +1503,10 @@ module std_private_memory_allocator_destructor [system] { header "__m module std_private_memory_allocator_traits [system] { header "__memory/allocator_traits.h" } module std_private_memory_assume_aligned [system] { header "__memory/assume_aligned.h" } module std_private_memory_auto_ptr [system] { header "__memory/auto_ptr.h" } -module std_private_memory_builtin_new_allocator [system] { header "__memory/builtin_new_allocator.h" } +module std_private_memory_builtin_new_allocator [system] { + header "__memory/builtin_new_allocator.h" + export * +} module std_private_memory_compressed_pair [system] { header "__memory/compressed_pair.h" } module std_private_memory_concepts [system] { header "__memory/concepts.h" @@ -1637,9 +1613,15 @@ module std_private_random_piecewise_linear_distribution [system] { export * } module std_private_random_poisson_distribution [system] { header "__random/poisson_distribution.h" } -module std_private_random_random_device [system] { header "__random/random_device.h" } +module std_private_random_random_device [system] { + header "__random/random_device.h" + export * +} module std_private_random_ranlux [system] { header "__random/ranlux.h" } -module std_private_random_seed_seq [system] { header "__random/seed_seq.h" } +module std_private_random_seed_seq [system] { + header "__random/seed_seq.h" + export * +} module std_private_random_shuffle_order_engine [system] { header "__random/shuffle_order_engine.h" } module std_private_random_student_t_distribution [system] { header "__random/student_t_distribution.h" } module std_private_random_subtract_with_carry_engine [system] { header "__random/subtract_with_carry_engine.h" } @@ -1682,7 +1664,6 @@ module std_private_ranges_filter_view [system] { module std_private_ranges_from_range [system] { header "__ranges/from_range.h" } module std_private_ranges_iota_view [system] { header "__ranges/iota_view.h" } module std_private_ranges_istream_view [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__ranges/istream_view.h" } module std_private_ranges_join_view [system] { @@ -1749,7 +1730,10 @@ module std_private_stop_token_stop_token [system] { export * } -module std_private_string_char_traits [system] { header "__string/char_traits.h" } +module std_private_string_char_traits [system] { + header "__string/char_traits.h" + export * +} module std_private_string_constexpr_c_functions [system] { header "__string/constexpr_c_functions.h" export std_private_type_traits_is_equality_comparable diff --git a/libcxx/include/sstream b/libcxx/include/sstream index d7ad0213eb34..40930df24c6d 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -399,8 +399,12 @@ public: _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() const & { return str(__str_.get_allocator()); } _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() && { + string_type __result; const basic_string_view<_CharT, _Traits> __view = view(); - string_type __result(std::move(__str_), __view.data() - __str_.data(), __view.size()); + if (!__view.empty()) { + auto __pos = __view.data() - __str_.data(); + __result.assign(std::move(__str_), __pos, __view.size()); + } __str_.clear(); __init_buf_ptrs(); return __result; @@ -415,7 +419,7 @@ public: } _LIBCPP_HIDE_FROM_ABI basic_string_view view() const noexcept; -#endif +#endif // _LIBCPP_STD_VER >= 20 void str(const string_type& __s) { __str_ = __s; @@ -900,20 +904,22 @@ public: return const_cast*>(&__sb_); } -#if _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const & { return __sb_.str(); } +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY) + _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } +#else + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() const & { return __sb_.str(); } + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() && { return std::move(__sb_).str(); } +#endif + +#if _LIBCPP_STD_VER >= 20 template requires __is_allocator<_SAlloc>::value _LIBCPP_HIDE_FROM_ABI basic_string str(const _SAlloc& __sa) const { return __sb_.str(__sa); } - _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); } - _LIBCPP_HIDE_FROM_ABI basic_string_view view() const noexcept { return __sb_.view(); } -#else // _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); } @@ -1023,20 +1029,22 @@ public: return const_cast*>(&__sb_); } -#if _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const & { return __sb_.str(); } +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY) + _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } +#else + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() const & { return __sb_.str(); } + + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() && { return std::move(__sb_).str(); } +#endif +#if _LIBCPP_STD_VER >= 20 template requires __is_allocator<_SAlloc>::value _LIBCPP_HIDE_FROM_ABI basic_string str(const _SAlloc& __sa) const { return __sb_.str(__sa); } - _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); } - _LIBCPP_HIDE_FROM_ABI basic_string_view view() const noexcept { return __sb_.view(); } -#else // _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); } @@ -1145,20 +1153,22 @@ public: return const_cast*>(&__sb_); } -#if _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const & { return __sb_.str(); } +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY) + _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } +#else + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() const & { return __sb_.str(); } + + _LIBCPP_HIDE_FROM_ABI_SSTREAM string_type str() && { return std::move(__sb_).str(); } +#endif +#if _LIBCPP_STD_VER >= 20 template requires __is_allocator<_SAlloc>::value _LIBCPP_HIDE_FROM_ABI basic_string str(const _SAlloc& __sa) const { return __sb_.str(__sa); } - _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); } - _LIBCPP_HIDE_FROM_ABI basic_string_view view() const noexcept { return __sb_.view(); } -#else // _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); } #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); } diff --git a/libcxx/modules/std/atomic.cppm b/libcxx/modules/std/atomic.cppm index 9c1948494bd4..faf902b768d7 100644 --- a/libcxx/modules/std/atomic.cppm +++ b/libcxx/modules/std/atomic.cppm @@ -23,9 +23,6 @@ export namespace std { using std::memory_order_seq_cst; using std::kill_dependency; -} // namespace std - -namespace std { // [atomics.ref.generic], class template atomic_ref // [atomics.ref.pointer], partial specialization for pointers diff --git a/libcxx/modules/std/execution.cppm b/libcxx/modules/std/execution.cppm index 6ea12c0dc4ee..e0996f33d415 100644 --- a/libcxx/modules/std/execution.cppm +++ b/libcxx/modules/std/execution.cppm @@ -17,7 +17,7 @@ export namespace std { using std::is_execution_policy_v; } // namespace std -namespace std::execution { +export namespace std::execution { // [execpol.seq], sequenced execution policy using std::execution::sequenced_policy; diff --git a/libcxx/modules/std/filesystem.cppm b/libcxx/modules/std/filesystem.cppm index ea8136b4ef9f..c6dac368a1cd 100644 --- a/libcxx/modules/std/filesystem.cppm +++ b/libcxx/modules/std/filesystem.cppm @@ -110,11 +110,11 @@ export namespace std::filesystem { } // namespace std::filesystem // [fs.path.hash], hash support -namespace std { +export namespace std { using std::hash; } -namespace std::ranges { +export namespace std::ranges { using std::ranges::enable_borrowed_range; using std::ranges::enable_view; } // namespace std::ranges diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index 0990d8dc181c..f1596132024c 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -31,7 +31,7 @@ # include // for gettimeofday and timeval #endif -#if defined(__APPLE__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) +#if defined(__APPLE__) || defined (__gnu_hurd__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) # define _LIBCPP_HAS_CLOCK_GETTIME #endif diff --git a/libcxx/src/filesystem/filesystem_clock.cpp b/libcxx/src/filesystem/filesystem_clock.cpp index d00cdc6df343..fbb19ac68df5 100644 --- a/libcxx/src/filesystem/filesystem_clock.cpp +++ b/libcxx/src/filesystem/filesystem_clock.cpp @@ -29,7 +29,7 @@ # include // for gettimeofday and timeval #endif -#if defined(__APPLE__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) +#if defined(__APPLE__) || defined (__gnu_hurd__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) # define _LIBCPP_HAS_CLOCK_GETTIME #endif diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp index f387c5d3db4e..05475c6ac1e2 100644 --- a/libunwind/src/Unwind-EHABI.cpp +++ b/libunwind/src/Unwind-EHABI.cpp @@ -885,8 +885,11 @@ _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { return result; } -static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation, - void* valuep) { +// Only used in _LIBUNWIND_TRACE_API, which is a no-op when assertions are +// disabled. +[[gnu::unused]] static uint64_t +ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation, + const void *valuep) { uint64_t value = 0; switch (representation) { case _UVRSD_UINT32: diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 9dc99e573d41..04ddb4682917 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -457,6 +457,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, return R_RISCV_ADD; case R_LARCH_32_PCREL: case R_LARCH_64_PCREL: + case R_LARCH_PCREL20_S2: return R_PC; case R_LARCH_B16: case R_LARCH_B21: @@ -564,6 +565,12 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write64le(loc, val); return; + case R_LARCH_PCREL20_S2: + checkInt(loc, val, 22, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setJ20(read32le(loc), val >> 2)); + return; + case R_LARCH_B16: checkInt(loc, val, 18, rel); checkAlignment(loc, val, 4, rel); diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 87942c1e9245..3d21edb3453a 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -471,10 +471,14 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, if (insn >> 26 != 31) error("unrecognized instruction for IE to LE R_PPC_TLS"); // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l - uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC_TLS"); - write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val)); + unsigned secondaryOp = (read32(loc) & 0x000007fe) >> 1; + uint32_t dFormOp = getPPCDFormOp(secondaryOp); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC_TLS"); + } + write32(loc, (dFormOp | (insn & 0x03ff0000) | lo(val))); break; } default: diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 36b1d0e3c9be..0b6459f852c0 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -37,6 +37,12 @@ enum XFormOpcd { STHX = 407, STWX = 151, STDX = 149, + LHAX = 343, + LWAX = 341, + LFSX = 535, + LFDX = 599, + STFSX = 663, + STFDX = 727, ADD = 266, }; @@ -49,7 +55,6 @@ enum DFormOpcd { LWZ = 32, LWZU = 33, LFSU = 49, - LD = 58, LFDU = 51, STB = 38, STBU = 39, @@ -59,10 +64,20 @@ enum DFormOpcd { STWU = 37, STFSU = 53, STFDU = 55, - STD = 62, + LHA = 42, + LFS = 48, + LFD = 50, + STFS = 52, + STFD = 54, ADDI = 14 }; +enum DSFormOpcd { + LD = 58, + LWA = 58, + STD = 62 +}; + constexpr uint32_t NOP = 0x60000000; enum class PPCLegacyInsn : uint32_t { @@ -825,26 +840,48 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, } } +// Map X-Form instructions to their DS-Form counterparts, if applicable. +// The full encoding is returned here to distinguish between the different +// DS-Form instructions. +unsigned elf::getPPCDSFormOp(unsigned secondaryOp) { + switch (secondaryOp) { + case LWAX: + return (LWA << 26) | 0x2; + case LDX: + return LD << 26; + case STDX: + return STD << 26; + default: + return 0; + } +} + unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: - return LBZ; + return LBZ << 26; case LHZX: - return LHZ; + return LHZ << 26; case LWZX: - return LWZ; - case LDX: - return LD; + return LWZ << 26; case STBX: - return STB; + return STB << 26; case STHX: - return STH; + return STH << 26; case STWX: - return STW; - case STDX: - return STD; + return STW << 26; + case LHAX: + return LHA << 26; + case LFSX: + return LFS << 26; + case LFDX: + return LFD << 26; + case STFSX: + return STFS << 26; + case STFDX: + return STFD << 26; case ADD: - return ADDI; + return ADDI << 26; default: return 0; } @@ -898,10 +935,16 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, error("unrecognized instruction for IE to LE R_PPC64_TLS"); uint32_t secondaryOp = (read32(loc) & 0x000007FE) >> 1; // bits 21-30 uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); - relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val); + uint32_t finalReloc; + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC64_TLS"); + finalReloc = R_PPC64_TPREL16_LO_DS; + } else + finalReloc = R_PPC64_TPREL16_LO; + write32(loc, dFormOp | (read32(loc) & 0x03ff0000)); + relocateNoSym(loc + offset, finalReloc, val); } else if (locAsInt % 4 == 1) { // If the offset is not 4 byte aligned then we have a PCRel type reloc. // This version of the relocation is offset by one byte from the @@ -926,9 +969,12 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, } } else { uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc - 1, ((dFormOp << 26) | (tlsInstr & 0x03FF0000))); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); + } + write32(loc - 1, (dFormOp | (tlsInstr & 0x03ff0000))); } } else { errorOrWarn("R_PPC64_TLS must be either 4 byte aligned or one byte " diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 9d4f22dd93f1..47dbe6b4d1c6 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -207,6 +207,7 @@ void processArmCmseSymbols(); void writePPC32GlinkSection(uint8_t *buf, size_t numEntries); unsigned getPPCDFormOp(unsigned secondaryOp); +unsigned getPPCDSFormOp(unsigned secondaryOp); // In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first // is a global entry point (GEP) which typically is used to initialize the TOC diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index d8e34d1e6c74..7e2e07d5f11b 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -107,6 +107,11 @@ COFF Improvements MinGW Improvements ------------------ +* A warning is now printed if the linked module contains runtime pseudo + relocations that are too narrow, that can end up producing runtime + errors if loaded too far away from the referenced DLL in the address + space. (`D154777 `_) + MachO Improvements ------------------ diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/GNUstepObjCRuntime/GNUstepObjCRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/GNUstepObjCRuntime/GNUstepObjCRuntime.cpp index fb2656ef1385..39b3e816f4be 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/GNUstepObjCRuntime/GNUstepObjCRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/GNUstepObjCRuntime/GNUstepObjCRuntime.cpp @@ -37,6 +37,33 @@ void GNUstepObjCRuntime::Terminate() { PluginManager::UnregisterPlugin(CreateInstance); } +static bool CanModuleBeGNUstepObjCLibrary(const ModuleSP &module_sp, + const llvm::Triple &TT) { + if (!module_sp) + return false; + const FileSpec &module_file_spec = module_sp->GetFileSpec(); + if (!module_file_spec) + return false; + llvm::StringRef filename = module_file_spec.GetFilename().GetStringRef(); + if (TT.isOSBinFormatELF()) + return filename.starts_with("libobjc.so"); + if (TT.isOSWindows()) + return filename == "objc.dll"; + return false; +} + +static bool ScanForGNUstepObjCLibraryCandidate(const ModuleList &modules, + const llvm::Triple &TT) { + std::lock_guard guard(modules.GetMutex()); + size_t num_modules = modules.GetSize(); + for (size_t i = 0; i < num_modules; i++) { + auto mod = modules.GetModuleAtIndex(i); + if (CanModuleBeGNUstepObjCLibrary(mod, TT)) + return true; + } + return false; +} + LanguageRuntime *GNUstepObjCRuntime::CreateInstance(Process *process, LanguageType language) { if (language != eLanguageTypeObjC) @@ -50,6 +77,9 @@ LanguageRuntime *GNUstepObjCRuntime::CreateInstance(Process *process, return nullptr; const ModuleList &images = target.GetImages(); + if (!ScanForGNUstepObjCLibraryCandidate(images, TT)) + return nullptr; + if (TT.isOSBinFormatELF()) { SymbolContextList eh_pers; RegularExpression regex("__gnustep_objc[x]*_personality_v[0-9]+"); @@ -176,18 +206,8 @@ void GNUstepObjCRuntime::UpdateISAToDescriptorMapIfNeeded() { } bool GNUstepObjCRuntime::IsModuleObjCLibrary(const ModuleSP &module_sp) { - if (!module_sp) - return false; - const FileSpec &module_file_spec = module_sp->GetFileSpec(); - if (!module_file_spec) - return false; - llvm::StringRef filename = module_file_spec.GetFilename().GetStringRef(); const llvm::Triple &TT = GetTargetRef().GetArchitecture().GetTriple(); - if (TT.isOSBinFormatELF()) - return filename.starts_with("libobjc.so"); - if (TT.isOSWindows()) - return filename == "objc.dll"; - return false; + return CanModuleBeGNUstepObjCLibrary(module_sp, TT); } bool GNUstepObjCRuntime::ReadObjCLibrary(const ModuleSP &module_sp) { diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp index 676e450c4846..d306c818e89f 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp @@ -47,6 +47,10 @@ bool RegisterContextPOSIX_arm64::IsPAuth(unsigned reg) const { return m_register_info_up->IsPAuthReg(reg); } +bool RegisterContextPOSIX_arm64::IsTLS(unsigned reg) const { + return m_register_info_up->IsTLSReg(reg); +} + RegisterContextPOSIX_arm64::RegisterContextPOSIX_arm64( lldb_private::Thread &thread, std::unique_ptr register_info) diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h index 7c301599d3af..6a935274fc40 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h @@ -55,6 +55,7 @@ class RegisterContextPOSIX_arm64 : public lldb_private::RegisterContext { bool IsSVE(unsigned reg) const; bool IsPAuth(unsigned reg) const; + bool IsTLS(unsigned reg) const; bool IsSVEZ(unsigned reg) const { return m_register_info_up->IsSVEZReg(reg); } bool IsSVEP(unsigned reg) const { return m_register_info_up->IsSVEPReg(reg); } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index 465d3f5b9f3b..20cfe732c6c2 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -117,6 +117,7 @@ class RegisterInfoPOSIX_arm64 bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } + bool IsTLSEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp index 22a9996b1a6e..38abd8f8f2b1 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp @@ -34,6 +34,12 @@ RegisterContextCorePOSIX_arm64::Create(Thread &thread, const ArchSpec &arch, if (pac_data.GetByteSize() >= sizeof(uint64_t) * 2) opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskPAuth); + DataExtractor tls_data = getRegset(notes, arch.GetTriple(), AARCH64_TLS_Desc); + // A valid note will always contain at least one register, "tpidr". It may + // expand in future. + if (tls_data.GetByteSize() >= sizeof(uint64_t)) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskTLS); + auto register_info_up = std::make_unique(arch, opt_regsets); return std::unique_ptr( @@ -59,6 +65,9 @@ RegisterContextCorePOSIX_arm64::RegisterContextCorePOSIX_arm64( if (m_register_info_up->IsPAuthEnabled()) m_pac_data = getRegset(notes, target_triple, AARCH64_PAC_Desc); + if (m_register_info_up->IsTLSEnabled()) + m_tls_data = getRegset(notes, target_triple, AARCH64_TLS_Desc); + ConfigureRegisterContext(); } @@ -223,6 +232,11 @@ bool RegisterContextCorePOSIX_arm64::ReadRegister(const RegisterInfo *reg_info, assert(offset < m_pac_data.GetByteSize()); value.SetFromMemoryData(*reg_info, m_pac_data.GetDataStart() + offset, reg_info->byte_size, lldb::eByteOrderLittle, error); + } else if (IsTLS(reg)) { + offset = reg_info->byte_offset - m_register_info_up->GetTLSOffset(); + assert(offset < m_tls_data.GetByteSize()); + value.SetFromMemoryData(*reg_info, m_tls_data.GetDataStart() + offset, + reg_info->byte_size, lldb::eByteOrderLittle, error); } else return false; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h index f8548562adba..5e0e29f0de7f 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h @@ -57,6 +57,7 @@ class RegisterContextCorePOSIX_arm64 : public RegisterContextPOSIX_arm64 { lldb_private::DataExtractor m_fpr_data; lldb_private::DataExtractor m_sve_data; lldb_private::DataExtractor m_pac_data; + lldb_private::DataExtractor m_tls_data; SVEState m_sve_state; uint16_t m_sve_vector_length = 0; diff --git a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h index f6a2fbdcc938..3d53a5795ef3 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.h @@ -123,6 +123,10 @@ constexpr RegsetDesc AARCH64_PAC_Desc[] = { {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_PAC_MASK}, }; +constexpr RegsetDesc AARCH64_TLS_Desc[] = { + {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_TLS}, +}; + constexpr RegsetDesc PPC_VMX_Desc[] = { {llvm::Triple::FreeBSD, llvm::Triple::UnknownArch, llvm::ELF::NT_PPC_VMX}, {llvm::Triple::Linux, llvm::Triple::UnknownArch, llvm::ELF::NT_PPC_VMX}, diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h index 53de2cb74253..4cf1de488c7b 100644 --- a/llvm/include/llvm/ADT/FunctionExtras.h +++ b/llvm/include/llvm/ADT/FunctionExtras.h @@ -59,7 +59,7 @@ namespace detail { template using EnableIfTrivial = - std::enable_if_t::value && + std::enable_if_t::value && std::is_trivially_destructible::value>; template using EnableUnlessSameType = @@ -99,11 +99,11 @@ template class UniqueFunctionBase { template struct AdjustedParamTBase { static_assert(!std::is_reference::value, "references should be handled by template specialization"); - using type = std::conditional_t< - llvm::is_trivially_copy_constructible::value && - llvm::is_trivially_move_constructible::value && - IsSizeLessThanThresholdT::value, - T, T &>; + using type = + std::conditional_t::value && + std::is_trivially_move_constructible::value && + IsSizeLessThanThresholdT::value, + T, T &>; }; // This specialization ensures that 'AdjustedParam&>' or diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 93d94916745d..53a107b1574c 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -326,8 +326,8 @@ class SmallVectorTemplateCommon /// copy these types with memcpy, there is no way for the type to observe this. /// This catches the important case of std::pair, which is not /// trivially assignable. -template ::value) && - (is_trivially_move_constructible::value) && +template ::value) && + (std::is_trivially_move_constructible::value) && std::is_trivially_destructible::value> class SmallVectorTemplateBase : public SmallVectorTemplateCommon { friend class SmallVectorTemplateCommon; diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h index 74591ee25ae5..8b04393dd294 100644 --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -254,7 +254,9 @@ void RegionBase::verifyBBInRegion(BlockT *BB) const { if (entry != BB) { for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB), InvBlockTraits::child_end(BB))) { - if (!contains(Pred)) + // Allow predecessors that are unreachable, as these are ignored during + // region analysis. + if (!contains(Pred) && DT->isReachableFromEntry(Pred)) report_fatal_error("Broken region found: edges entering the region must " "go to the entry node!"); } diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 8cab01c2f11d..ba94852d0dbf 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -124,10 +124,6 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, const DominatorTree *DT = nullptr, bool UseInstrInfo = true); -/// Return true if the given instruction is only used in zero comparison -bool isOnlyUsedInZeroComparison(const Instruction *CxtI); - -/// Return true if the given instruction is only used in zero equality comparison bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); /// Return true if the given value is known to be non-zero when defined. For diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index ab1219328a5d..b77bcdb89024 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -579,7 +579,7 @@ void CodeGenPassBuilder::addISelPasses(AddIRPass &addPass) const { if (TM.useEmulatedTLS()) addPass(LowerEmuTLSPass()); - addPass(PreISelIntrinsicLoweringPass()); + addPass(PreISelIntrinsicLoweringPass(TM)); derived().addIRPasses(addPass); derived().addCodeGenPrepare(addPass); diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h index 2924f475ac85..a1e778cd718f 100644 --- a/llvm/include/llvm/CodeGen/LowLevelType.h +++ b/llvm/include/llvm/CodeGen/LowLevelType.h @@ -238,10 +238,9 @@ class LLT { return getFieldValue(VectorSizeFieldInfo); else return getFieldValue(PointerVectorSizeFieldInfo); - } else if (IsPointer) - return getFieldValue(PointerSizeFieldInfo); - else - llvm_unreachable("unexpected LLT"); + } + assert(IsPointer && "unexpected LLT"); + return getFieldValue(PointerSizeFieldInfo); } constexpr unsigned getAddressSpace() const { diff --git a/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h index 73d7d779e55b..aa6a0e6935b3 100644 --- a/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h +++ b/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h @@ -18,9 +18,13 @@ namespace llvm { class Module; +class TargetMachine; struct PreISelIntrinsicLoweringPass : PassInfoMixin { + const TargetMachine &TM; + + PreISelIntrinsicLoweringPass(const TargetMachine &TM) : TM(TM) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 817d32ea0ef6..93dfcfc39924 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1044,16 +1044,6 @@ class TargetInstrInfo : public MCInstrInfo { return isCopyInstrImpl(MI); } - bool isFullCopyInstr(const MachineInstr &MI) const { - auto DestSrc = isCopyInstr(MI); - if (!DestSrc) - return false; - - const MachineOperand *DestRegOp = DestSrc->Destination; - const MachineOperand *SrcRegOp = DestSrc->Source; - return !DestRegOp->getSubReg() && !SrcRegOp->getSubReg(); - } - /// If the specific machine instruction is an instruction that adds an /// immediate value and a physical register, and stores the result in /// the given physical register \c Reg, return a pair of the source @@ -1968,13 +1958,6 @@ class TargetInstrInfo : public MCInstrInfo { return false; } - /// Allows targets to use appropriate copy instruction while spilitting live - /// range of a register in register allocation. - virtual unsigned getLiveRangeSplitOpcode(Register Reg, - const MachineFunction &MF) const { - return TargetOpcode::COPY; - } - /// During PHI eleimination lets target to make necessary checks and /// insert the copy to the PHI destination register in a target specific /// manner. diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index 8dd8918ddf21..dfab4c68d18f 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -104,12 +104,14 @@ class WasmSymbol { struct WasmSection { WasmSection() = default; - uint32_t Type = 0; // Section type (See below) - uint32_t Offset = 0; // Offset with in the file + uint32_t Type = 0; + uint32_t Offset = 0; // Offset within the file StringRef Name; // Section name (User-defined sections only) uint32_t Comdat = UINT32_MAX; // From the "comdat info" section - ArrayRef Content; // Section content - std::vector Relocations; // Relocations for this section + ArrayRef Content; + std::vector Relocations; + // Length of the LEB encoding of the section header's size field + std::optional HeaderSecSizeEncodingLen; }; struct WasmSegment { diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h index 0f6c4f06665f..94ecc2fcfdb5 100644 --- a/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -189,6 +189,7 @@ struct Section { SectionType Type; std::vector Relocations; + std::optional HeaderSecSizeEncodingLen; }; struct CustomSection : Section { diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 310c8900af9e..c57bd2350af1 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -299,6 +299,7 @@ class ArgList { /// \p Default if neither option is given. If both the option and its /// negation are present, the last one wins. bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; + bool hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; /// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative /// form \p Neg, return true if the option or its alias is present, false if diff --git a/llvm/include/llvm/Support/type_traits.h b/llvm/include/llvm/Support/type_traits.h index 86f07c19477d..3fd158def34d 100644 --- a/llvm/include/llvm/Support/type_traits.h +++ b/llvm/include/llvm/Support/type_traits.h @@ -69,21 +69,6 @@ struct const_pointer_or_const_ref>> { }; namespace detail { -/// Internal utility to detect trivial copy construction. -template union copy_construction_triviality_helper { - T t; - copy_construction_triviality_helper() = default; - copy_construction_triviality_helper(const copy_construction_triviality_helper&) = default; - ~copy_construction_triviality_helper() = default; -}; -/// Internal utility to detect trivial move construction. -template union move_construction_triviality_helper { - T t; - move_construction_triviality_helper() = default; - move_construction_triviality_helper(move_construction_triviality_helper&&) = default; - ~move_construction_triviality_helper() = default; -}; - template union trivial_helper { T t; @@ -91,29 +76,6 @@ union trivial_helper { } // end namespace detail -/// An implementation of `std::is_trivially_copy_constructible` since we have -/// users with STLs that don't yet include it. -template -struct is_trivially_copy_constructible - : std::is_copy_constructible< - ::llvm::detail::copy_construction_triviality_helper> {}; -template -struct is_trivially_copy_constructible : std::true_type {}; -template -struct is_trivially_copy_constructible : std::false_type {}; - -/// An implementation of `std::is_trivially_move_constructible` since we have -/// users with STLs that don't yet include it. -template -struct is_trivially_move_constructible - : std::is_move_constructible< - ::llvm::detail::move_construction_triviality_helper> {}; -template -struct is_trivially_move_constructible : std::true_type {}; -template -struct is_trivially_move_constructible : std::true_type {}; - - template struct is_copy_assignable { template diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index 82ab064211d7..028844187584 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -66,13 +66,9 @@ struct ArchInfo { bool isValidArchName(StringRef Arch); bool getArchFeatures(StringRef Arch, std::vector &Features); -bool isValidTuneCPUName(StringRef TuneCPU); -void fillValidTuneCPUList(SmallVectorImpl &Values); +bool isValidCPUName(StringRef TuneCPU); +void fillValidCPUList(SmallVectorImpl &Values); StringRef getDefaultArch(bool Is64Bit); -void setArch(StringRef Arch); -StringRef getArch(); -void setTuneCPU(StringRef TuneCPU); -StringRef getTuneCPU(); } // namespace LoongArch diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h index 3568417510f1..2d76546316fa 100644 --- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h +++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h @@ -8,7 +8,7 @@ /// \file /// /// AggressiveInstCombiner - Combine expression patterns to form expressions -/// with fewer, simple instructions. +/// with fewer, simple instructions. This pass does not modify the CFG. /// //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index d3e5e2591eea..9ce64623e25b 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -20,7 +20,6 @@ namespace llvm { class AAResults; -class AllocaInst; class BatchAAResults; class AssumptionCache; class CallBase; @@ -78,9 +77,6 @@ class MemCpyOptPass : public PassInfoMixin { Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); - bool performStackMoveOptzn(Instruction *Load, Instruction *Store, - AllocaInst *DestAlloca, AllocaInst *SrcAlloca, - uint64_t Size, BatchAAResults &BAA); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5d526858e00e..410f93b1c215 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -261,13 +261,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); } -bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) { - return !I->user_empty() && all_of(I->users(), [](const User *U) { - ICmpInst::Predicate P; - return match(U, m_ICmp(P, m_Value(), m_Zero())); - }); -} - bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { return !I->user_empty() && all_of(I->users(), [](const User *U) { ICmpInst::Predicate P; diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 0377bc002067..5a005ba7b414 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -97,7 +97,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, // Trace copies introduced by live range splitting. The inline // spiller can rematerialize through these copies, so the spill // weight must reflect this. - while (TII.isFullCopyInstr(*MI)) { + while (MI->isFullCopy()) { // The copy destination must match the interval register. if (MI->getOperand(0).getReg() != Reg) return false; @@ -224,16 +224,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, continue; NumInstr++; - bool identityCopy = false; - auto DestSrc = TII.isCopyInstr(*MI); - if (DestSrc) { - const MachineOperand *DestRegOp = DestSrc->Destination; - const MachineOperand *SrcRegOp = DestSrc->Source; - identityCopy = DestRegOp->getReg() == SrcRegOp->getReg() && - DestRegOp->getSubReg() == SrcRegOp->getSubReg(); - } - - if (identityCopy || MI->isImplicitDef()) + if (MI->isIdentityCopy() || MI->isImplicitDef()) continue; if (!Visited.insert(MI).second) continue; @@ -267,7 +258,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, } // Get allocation hints from copies. - if (!TII.isCopyInstr(*MI)) + if (!MI->isCopy()) continue; Register HintReg = copyHint(MI, LI.reg(), TRI, MRI); if (!HintReg) diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 02c67e500bdc..952f454f8f6a 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -226,6 +226,7 @@ class ComplexDeinterleavingGraph { const TargetLowering *TL = nullptr; const TargetLibraryInfo *TLI = nullptr; SmallVector CompositeNodes; + DenseMap, NodePtr> CachedResult; SmallPtrSet FinalInstructions; @@ -292,17 +293,11 @@ class ComplexDeinterleavingGraph { NodePtr submitCompositeNode(NodePtr Node) { CompositeNodes.push_back(Node); + if (Node->Real && Node->Imag) + CachedResult[{Node->Real, Node->Imag}] = Node; return Node; } - NodePtr getContainingComposite(Value *R, Value *I) { - for (const auto &CN : CompositeNodes) { - if (CN->Real == R && CN->Imag == I) - return CN; - } - return nullptr; - } - /// Identifies a complex partial multiply pattern and its rotation, based on /// the following patterns /// @@ -900,9 +895,11 @@ ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) { LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n"); assert(R->getType() == I->getType() && "Real and imaginary parts should not have different types"); - if (NodePtr CN = getContainingComposite(R, I)) { + + auto It = CachedResult.find({R, I}); + if (It != CachedResult.end()) { LLVM_DEBUG(dbgs() << " - Folding to existing node\n"); - return CN; + return It->second; } if (NodePtr CN = identifySplat(R, I)) @@ -949,6 +946,7 @@ ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) { return CN; LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n"); + CachedResult[{R, I}] = nullptr; return nullptr; } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index c62f3db9d321..277c6be418c5 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -256,11 +256,11 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass, // This minimizes register pressure and maximizes the store-to-load distance for // spill slots which can be important in tight loops. -/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, -/// otherwise return 0. -static Register isCopyOf(const MachineInstr &MI, Register Reg, - const TargetInstrInfo &TII) { - if (!TII.isCopyInstr(MI)) +/// If MI is a COPY to or from Reg, return the other register, otherwise return +/// 0. +static Register isCopyOf(const MachineInstr &MI, Register Reg) { + assert(!MI.isBundled()); + if (!MI.isCopy()) return Register(); const MachineOperand &DstOp = MI.getOperand(0); @@ -277,10 +277,9 @@ static Register isCopyOf(const MachineInstr &MI, Register Reg, } /// Check for a copy bundle as formed by SplitKit. -static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg, - const TargetInstrInfo &TII) { +static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg) { if (!FirstMI.isBundled()) - return isCopyOf(FirstMI, Reg, TII); + return isCopyOf(FirstMI, Reg); assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() && "expected to see first instruction in bundle"); @@ -289,12 +288,11 @@ static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg, MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator(); while (I->isBundledWithSucc()) { const MachineInstr &MI = *I; - auto CopyInst = TII.isCopyInstr(MI); - if (!CopyInst) + if (!MI.isCopy()) return Register(); - const MachineOperand &DstOp = *CopyInst->Destination; - const MachineOperand &SrcOp = *CopyInst->Source; + const MachineOperand &DstOp = MI.getOperand(0); + const MachineOperand &SrcOp = MI.getOperand(1); if (DstOp.getReg() == Reg) { if (!SnipReg) SnipReg = SrcOp.getReg(); @@ -360,7 +358,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { MachineInstr &MI = *RI++; // Allow copies to/from Reg. - if (isCopyOfBundle(MI, Reg, TII)) + if (isCopyOfBundle(MI, Reg)) continue; // Allow stack slot loads. @@ -398,7 +396,7 @@ void InlineSpiller::collectRegsToSpill() { return; for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) { - Register SnipReg = isCopyOfBundle(MI, Reg, TII); + Register SnipReg = isCopyOfBundle(MI, Reg); if (!isSibling(SnipReg)) continue; LiveInterval &SnipLI = LIS.getInterval(SnipReg); @@ -521,14 +519,14 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Find all spills and copies of VNI. for (MachineInstr &MI : llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) { - if (!MI.mayStore() && !TII.isCopyInstr(MI)) + if (!MI.isCopy() && !MI.mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) continue; // Follow sibling copies down the dominator tree. - if (Register DstReg = isCopyOfBundle(MI, Reg, TII)) { + if (Register DstReg = isCopyOfBundle(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); @@ -872,7 +870,7 @@ foldMemoryOperand(ArrayRef> Ops, if (Ops.back().first != MI || MI->isBundled()) return false; - bool WasCopy = TII.isCopyInstr(*MI).has_value(); + bool WasCopy = MI->isCopy(); Register ImpReg; // TII::foldMemoryOperand will do what we need here for statepoint @@ -1157,7 +1155,7 @@ void InlineSpiller::spillAroundUses(Register Reg) { Idx = VNI->def; // Check for a sibling copy. - Register SibReg = isCopyOfBundle(MI, Reg, TII); + Register SibReg = isCopyOfBundle(MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index ff49e080090c..c3477cd8ce34 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -352,8 +352,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // unlikely to change anything. We typically don't want to shrink the // PIC base register that has lots of uses everywhere. // Always shrink COPY uses that probably come from live range splitting. - if ((MI->readsVirtualRegister(Reg) && - (MO.isDef() || TII.isCopyInstr(*MI))) || + if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) || (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO)))) ToShrink.insert(&LI); else if (MO.readsReg()) diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index af7d6c4403b8..93f5314539cd 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -110,7 +109,6 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { return false; MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); @@ -199,7 +197,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { // is because it needs more accurate model to handle register // pressure correctly. MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); - if (!TII.isCopyInstr(DefInstr)) + if (!DefInstr.isCopy()) NumEligibleUse++; Insert = FindDominatedInstruction(DefInstr, Insert, IOM); } else { diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 4e80e9b58c06..523e077fd9a2 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -538,6 +538,10 @@ void MachineLICMBase::HoistRegionPostRA() { PhysRegDefs.set(*AI); } + // Funclet entry blocks will clobber all registers + if (const uint32_t *Mask = BB->getBeginClobberMask(TRI)) + PhysRegClobbers.setBitsNotInMask(Mask); + SpeculationState = SpeculateUnknown; for (MachineInstr &MI : *BB) ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 3448c56e4994..5b822b5d7b95 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ObjCARCUtil.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -26,6 +27,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" using namespace llvm; @@ -41,19 +43,19 @@ static cl::opt MemIntrinsicExpandSizeThresholdOpt( namespace { struct PreISelIntrinsicLowering { + const TargetMachine &TM; const function_ref LookupTTI; - const function_ref LookupLibInfo; /// If this is true, assume it's preferably to leave memory intrinsic calls /// for replacement with a library call later. Otherwise this depends on - /// TargetLibraryInfo availability of the corresponding function. + /// TargetLoweringInfo availability of the corresponding function. const bool UseMemIntrinsicLibFunc; explicit PreISelIntrinsicLowering( + const TargetMachine &TM_, function_ref LookupTTI_, - function_ref LookupLibInfo_, bool UseMemIntrinsicLibFunc_ = true) - : LookupTTI(LookupTTI_), LookupLibInfo(LookupLibInfo_), + : TM(TM_), LookupTTI(LookupTTI_), UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {} static bool shouldExpandMemIntrinsicWithSize(Value *Size, @@ -187,6 +189,13 @@ bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize( return SizeVal > Threshold || Threshold == 0; } +static bool canEmitLibcall(const TargetMachine &TM, Function *F, + RTLIB::Libcall LC) { + // TODO: Should this consider the address space of the memcpy? + const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); + return TLI->getLibcallName(LC) != nullptr; +} + // TODO: Handle atomic memcpy and memcpy.inline // TODO: Pass ScalarEvolution bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { @@ -203,9 +212,10 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) { if (UseMemIntrinsicLibFunc && - LookupLibInfo(*ParentFunc).has(LibFunc_memcpy)) + canEmitLibcall(TM, ParentFunc, RTLIB::MEMCPY)) break; + // TODO: For optsize, emit the loop into a separate function expandMemCpyAsLoop(Memcpy, TTI); Changed = true; Memcpy->eraseFromParent(); @@ -219,7 +229,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) { if (UseMemIntrinsicLibFunc && - LookupLibInfo(*ParentFunc).has(LibFunc_memmove)) + canEmitLibcall(TM, ParentFunc, RTLIB::MEMMOVE)) break; if (expandMemMoveAsLoop(Memmove, TTI)) { @@ -236,7 +246,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const { const TargetTransformInfo &TTI = LookupTTI(*ParentFunc); if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) { if (UseMemIntrinsicLibFunc && - LookupLibInfo(*Memset->getFunction()).has(LibFunc_memset)) + canEmitLibcall(TM, ParentFunc, RTLIB::MEMSET)) break; expandMemSetAsLoop(Memset); @@ -357,8 +367,8 @@ class PreISelIntrinsicLoweringLegacyPass : public ModulePass { PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool runOnModule(Module &M) override { @@ -366,11 +376,8 @@ class PreISelIntrinsicLoweringLegacyPass : public ModulePass { return this->getAnalysis().getTTI(F); }; - auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - - PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + const auto &TM = getAnalysis().getTM(); + PreISelIntrinsicLowering Lowering(TM, LookupTTI); return Lowering.lowerIntrinsics(M); } }; @@ -379,27 +386,28 @@ class PreISelIntrinsicLoweringLegacyPass : public ModulePass { char PreISelIntrinsicLoweringLegacyPass::ID; -INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass, - "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering", - false, false) +INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass, + "pre-isel-intrinsic-lowering", + "Pre-ISel Intrinsic Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(PreISelIntrinsicLoweringLegacyPass, + "pre-isel-intrinsic-lowering", + "Pre-ISel Intrinsic Lowering", false, false) ModulePass *llvm::createPreISelIntrinsicLoweringPass() { - return new PreISelIntrinsicLoweringLegacyPass; + return new PreISelIntrinsicLoweringLegacyPass(); } PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); - auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; - PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI); + PreISelIntrinsicLowering Lowering(TM, LookupTTI); if (!Lowering.lowerIntrinsics(M)) return PreservedAnalyses::all(); else diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 68f6ea3268a9..48187e575494 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1282,12 +1282,10 @@ static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, /// VirtReg. static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, - const TargetRegisterInfo *TRI, SlotIndex Use, - const TargetInstrInfo *TII) { + const TargetRegisterInfo *TRI, SlotIndex Use) { // Early check the common case. - auto DestSrc = TII->isCopyInstr(*MI); - if (DestSrc && - DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg()) + if (MI->isCopy() && + MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg()) return false; // FIXME: We're only considering uses, but should be consider defs too? @@ -1346,14 +1344,14 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, // the allocation. for (const SlotIndex Use : Uses) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) { - if (TII->isFullCopyInstr(*MI) || + if (MI->isFullCopy() || (SplitSubClass && SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, TII, TRI, RegClassInfo)) || // TODO: Handle split for subranges with subclass constraints? (!SplitSubClass && VirtReg.hasSubRanges() && - !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) { + !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } @@ -2140,7 +2138,7 @@ void RAGreedy::initializeCSRCost() { /// \p Out is not cleared before being populated. void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { - if (!TII->isFullCopyInstr(Instr)) + if (!Instr.isFullCopy()) continue; // Look for the other end of the copy. Register OtherReg = Instr.getOperand(0).getReg(); @@ -2455,10 +2453,9 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) { MI.getOpcode() == TargetOpcode::STATEPOINT; }; for (MachineInstr &MI : MBB) { - auto DestSrc = TII->isCopyInstr(MI); - if (DestSrc) { - const MachineOperand &Dest = *DestSrc->Destination; - const MachineOperand &Src = *DestSrc->Source; + if (MI.isCopy()) { + const MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Src = MI.getOperand(1); Register SrcReg = Src.getReg(); Register DestReg = Dest.getReg(); // Only count `COPY`s with a virtual register as source or destination. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index de909cc10795..235f0da86b90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20492,9 +20492,11 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { SDValue Elt = Value.getOperand(1); SDValue Idx = Value.getOperand(2); - // If the element isn't byte sized then we can't compute an offset + // If the element isn't byte sized or is implicitly truncated then we can't + // compute an offset. EVT EltVT = Elt.getValueType(); - if (!EltVT.isByteSized()) + if (!EltVT.isByteSized() || + EltVT != Value.getOperand(0).getValueType().getVectorElementType()) return SDValue(); auto *Ld = dyn_cast(Value.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5c1b19eba1c1..30d202494320 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1945,6 +1945,9 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, assert(MulImm.getBitWidth() == VT.getSizeInBits() && "APInt size does not match type size!"); + if (MulImm == 0) + return getConstant(0, DL, VT); + if (ConstantFold) { const MachineFunction &MF = getMachineFunction(); auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9595da9d0d8a..20c37eb4cb11 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4156,6 +4156,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } +static const MDNode *getRangeMetadata(const Instruction &I) { + // If !noundef is not present, then !range violation results in a poison + // value rather than immediate undefined behavior. In theory, transferring + // these annotations to SDAG is fine, but in practice there are key SDAG + // transforms that are known not to be poison-safe, such as folding logical + // and/or to bitwise and/or. For now, only transfer !range if !noundef is + // also present. + if (!I.hasMetadata(LLVMContext::MD_noundef)) + return nullptr; + return I.getMetadata(LLVMContext::MD_range); +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -4180,7 +4192,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) @@ -4188,7 +4200,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Align Alignment = I.getAlign(); AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); @@ -4219,14 +4231,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); - // An aggregate load cannot wrap around the address space, so offsets to its - // parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); - EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -4243,13 +4249,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, dl, - PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), - Flags); - SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, - MachinePointerInfo(SV, Offsets[i]), Alignment, + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment, MMOFlags, AAInfo, Ranges); Chains[ChainI] = L.getValue(1); @@ -4351,7 +4359,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); @@ -4372,11 +4380,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); - // An aggregate load cannot wrap around the address space, so offsets to its - // parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. @@ -4386,14 +4389,19 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); + + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); SDValue St = - DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), - Alignment, MMOFlags, AAInfo); + DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -4607,7 +4615,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); // Do not serialize masked loads of constant memory with anything. MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); @@ -4641,7 +4649,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ->getMaybeAlignValue() .value_or(DAG.getEVTAlign(VT.getScalarType())); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); SDValue Root = DAG.getRoot(); SDValue Base; @@ -7396,7 +7404,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } SDValue TripCount = getValue(I.getOperand(1)); - auto VecTy = CCVT.changeVectorElementType(ElementVT); + EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT, + CCVT.getVectorElementCount()); SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index); SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount); @@ -7645,7 +7654,7 @@ void SelectionDAGBuilder::visitVPLoad( Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; // Do not serialize variable-length loads of constant memory with // anything. @@ -7672,7 +7681,7 @@ void SelectionDAGBuilder::visitVPGather( Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); @@ -7779,7 +7788,7 @@ void SelectionDAGBuilder::visitVPStridedLoad( if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -9626,7 +9635,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { - const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + const MDNode *Range = getRangeMetadata(I); if (!Range) return Op; @@ -11310,8 +11319,32 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - if (FallthroughUnreachable) - JTH->FallthroughUnreachable = true; + // If the default clause is unreachable, propagate that knowledge into + // JTH->FallthroughUnreachable which will use it to suppress the range + // check. + // + // However, don't do this if we're doing branch target enforcement, + // because a table branch _without_ a range check can be a tempting JOP + // gadget - out-of-bounds inputs that are impossible in correct + // execution become possible again if an attacker can influence the + // control flow. So if an attacker doesn't already have a BTI bypass + // available, we don't want them to be able to get one out of this + // table branch. + if (FallthroughUnreachable) { + Function &CurFunc = CurMF->getFunction(); + bool HasBranchTargetEnforcement = false; + if (CurFunc.hasFnAttribute("branch-target-enforcement")) { + HasBranchTargetEnforcement = + CurFunc.getFnAttribute("branch-target-enforcement") + .getValueAsBool(); + } else { + HasBranchTargetEnforcement = + CurMF->getMMI().getModule()->getModuleFlag( + "branch-target-enforcement"); + } + if (!HasBranchTargetEnforcement) + JTH->FallthroughUnreachable = true; + } if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 83964eced597..eee54f09fbad 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -514,10 +514,10 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) { VFP = ValueForcePair(nullptr, true); } -SlotIndex SplitEditor::buildSingleSubRegCopy( - Register FromReg, Register ToReg, MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, unsigned SubIdx, - LiveInterval &DestLI, bool Late, SlotIndex Def, const MCInstrDesc &Desc) { +SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg, + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) { + const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); bool FirstCopy = !Def.isValid(); MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc) .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy) @@ -536,8 +536,7 @@ SlotIndex SplitEditor::buildSingleSubRegCopy( SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { - const MCInstrDesc &Desc = - TII.get(TII.getLiveRangeSplitOpcode(FromReg, *MBB.getParent())); + const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { // The full vreg is copied. @@ -565,7 +564,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, SlotIndex Def; for (unsigned BestIdx : SubIndexes) { Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, - DestLI, Late, Def, Desc); + DestLI, Late, Def); } BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); @@ -1585,9 +1584,7 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI, if (BI.LiveIn && BI.LiveOut) return true; // No point in isolating a copy. It has no register class constraints. - MachineInstr *MI = LIS.getInstructionFromIndex(BI.FirstInstr); - bool copyLike = TII.isCopyInstr(*MI) || MI->isSubregToReg(); - if (copyLike) + if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike()) return false; // Finally, don't isolate an end point that was created by earlier splits. return isOriginalEndpoint(BI.FirstInstr); diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 1174e392e4e4..f764ffd4750c 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -428,11 +428,8 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor { bool Late, unsigned RegIdx); SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg, - MachineBasicBlock &MB, - MachineBasicBlock::iterator InsertBefore, - unsigned SubIdx, LiveInterval &DestLI, - bool Late, SlotIndex Def, - const MCInstrDesc &Desc); + MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore, + unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 09dcddc17b06..b29404b42519 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -440,9 +440,8 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB, // If the COPY instruction in MI can be folded to a stack operation, return // the register class to use. static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, - const TargetInstrInfo &TII, unsigned FoldIdx) { - assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction"); + assert(MI.isCopy() && "MI must be a COPY instruction"); if (MI.getNumOperands() != 2) return nullptr; assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); @@ -631,10 +630,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, } // Straight COPY may fold as load/store. - if (!isCopyInstr(MI) || Ops.size() != 1) + if (!MI.isCopy() || Ops.size() != 1) return nullptr; - const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]); + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); if (!RC) return nullptr; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index badb7fe53333..68a4616fe4b8 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -921,7 +921,7 @@ void TargetLoweringBase::initActions() { // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. setOperationAction(ISD::ConstantFP, - {MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, + {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, Expand); // These library functions default to expand. diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 647f570ab807..55fb522554fa 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2426,23 +2426,6 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( Name, Kind, XCOFF::CsectProperties(SMC, XCOFF::XTY_CM)); } - if (Kind.isMergeableCString()) { - Align Alignment = GO->getParent()->getDataLayout().getPreferredAlign( - cast(GO)); - - unsigned EntrySize = getEntrySizeForKind(Kind); - std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; - SmallString<128> Name; - Name = SizeSpec + utostr(Alignment.value()); - - if (TM.getDataSections()) - getNameWithPrefix(Name, GO, TM); - - return getContext().getXCOFFSection( - Name, Kind, XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD), - /* MultiSymbolsAllowed*/ !TM.getDataSections()); - } - if (Kind.isText()) { if (TM.getFunctionSections()) { return cast(getFunctionEntryPointSymbol(GO, TM)) diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 6803d6ab1285..bc8abb751221 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -184,7 +184,6 @@ void llvm::computeLTOCacheKey( } const ModuleHash &getHash() const { return ModInfo->second.second; } - uint64_t getId() const { return ModInfo->second.first; } }; std::vector ImportModulesVector; @@ -194,11 +193,11 @@ void llvm::computeLTOCacheKey( ++It) { ImportModulesVector.push_back({It, Index.getModule(It->getKey())}); } - // Order using moduleId integer which is based on the order the module was - // added. + // Order using module hash, to be both independent of module name and + // module order. llvm::sort(ImportModulesVector, [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool { - return Lhs.getId() < Rhs.getId(); + return Lhs.getHash() < Rhs.getHash(); }); for (const ImportModule &Entry : ImportModulesVector) { auto ModHash = Entry.getHash(); diff --git a/llvm/lib/ObjCopy/wasm/WasmObject.h b/llvm/lib/ObjCopy/wasm/WasmObject.h index 9bc5831926c6..f860ec697e56 100644 --- a/llvm/lib/ObjCopy/wasm/WasmObject.h +++ b/llvm/lib/ObjCopy/wasm/WasmObject.h @@ -23,6 +23,7 @@ struct Section { // For now, each section is only an opaque binary blob with no distinction // between custom and known sections. uint8_t SectionType; + std::optional HeaderSecSizeEncodingLen; StringRef Name; ArrayRef Contents; }; diff --git a/llvm/lib/ObjCopy/wasm/WasmReader.cpp b/llvm/lib/ObjCopy/wasm/WasmReader.cpp index 6e7d8b5591c9..578e78955af3 100644 --- a/llvm/lib/ObjCopy/wasm/WasmReader.cpp +++ b/llvm/lib/ObjCopy/wasm/WasmReader.cpp @@ -22,8 +22,8 @@ Expected> Reader::create() const { Obj->Sections.reserve(WasmObj.getNumSections()); for (const SectionRef &Sec : WasmObj.sections()) { const WasmSection &WS = WasmObj.getWasmSection(Sec); - Obj->Sections.push_back( - {static_cast(WS.Type), WS.Name, WS.Content}); + Obj->Sections.push_back({static_cast(WS.Type), + WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content}); // Give known sections standard names to allow them to be selected. (Custom // sections already have their names filled in by the parser). Section &ReaderSec = Obj->Sections.back(); diff --git a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp index fdcd441cc798..bfab25ce8097 100644 --- a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp +++ b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp @@ -29,16 +29,19 @@ Writer::SectionHeader Writer::createSectionHeader(const Section &S, SectionSize = S.Contents.size(); if (HasName) SectionSize += getULEB128Size(S.Name.size()) + S.Name.size(); - // Pad the LEB value out to 5 bytes to make it a predictable size, and - // match the behavior of clang. - encodeULEB128(SectionSize, OS, 5); + // If we read this section from an object file, use its original size for the + // padding of the LEB value to avoid changing the file size. Otherwise, pad + // out to 5 bytes to make it predictable, and match the behavior of clang. + unsigned HeaderSecSizeEncodingLen = + S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5; + encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen); if (HasName) { encodeULEB128(S.Name.size(), OS); OS << S.Name; } // Total section size is the content size plus 1 for the section type and - // 5 for the LEB-encoded size. - SectionSize = SectionSize + 1 + 5; + // the LEB-encoded size. + SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen; return Header; } diff --git a/llvm/lib/Object/SymbolSize.cpp b/llvm/lib/Object/SymbolSize.cpp index e42dbe6f47ab..eee5505b8c14 100644 --- a/llvm/lib/Object/SymbolSize.cpp +++ b/llvm/lib/Object/SymbolSize.cpp @@ -84,16 +84,21 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) { array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress); - // Compute the size as the gap to the next symbol - for (unsigned I = 0, N = Addresses.size() - 1; I < N; ++I) { + // Compute the size as the gap to the next symbol. If multiple symbols have + // the same address, give both the same size. Because Addresses is sorted, + // using two pointers to keep track of the current symbol vs. the next symbol + // that doesn't have the same address for size computation. + for (unsigned I = 0, NextI = 0, N = Addresses.size() - 1; I < N; ++I) { auto &P = Addresses[I]; if (P.I == O.symbol_end()) continue; - // If multiple symbol have the same address, give both the same size. - unsigned NextI = I + 1; - while (NextI < N && Addresses[NextI].Address == P.Address) - ++NextI; + // If the next pointer is behind, update it to the next symbol. + if (NextI <= I) { + NextI = I + 1; + while (NextI < N && Addresses[NextI].Address == P.Address) + ++NextI; + } uint64_t Size = Addresses[NextI].Address - P.Address; P.Address = Size; diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index a72242bc4ac2..11b9b579a8d7 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -268,7 +268,11 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx, Section.Offset = Ctx.Ptr - Ctx.Start; Section.Type = readUint8(Ctx); LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n"); + // When reading the section's size, store the size of the LEB used to encode + // it. This allows objcopy/strip to reproduce the binary identically. + const uint8_t *PreSizePtr = Ctx.Ptr; uint32_t Size = readVaruint32(Ctx); + Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr; if (Size == 0) return make_error("zero length section", object_error::parse_failed); diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp index 6230312eff7b..9b8fd11f8543 100644 --- a/llvm/lib/ObjectYAML/WasmEmitter.cpp +++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp @@ -646,8 +646,18 @@ bool WasmWriter::writeWasm(raw_ostream &OS) { StringStream.flush(); + unsigned HeaderSecSizeEncodingLen = + Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5; + unsigned RequiredLen = getULEB128Size(OutString.size()); + // Wasm spec does not allow LEBs larger than 5 bytes + assert(RequiredLen <= 5); + if (HeaderSecSizeEncodingLen < RequiredLen) { + reportError("section header length can't be encoded in a LEB of size " + + Twine(HeaderSecSizeEncodingLen)); + return false; + } // Write the section size followed by the content - encodeULEB128(OutString.size(), OS); + encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen); OS << OutString; } diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp index 7ca422487df2..ef47766a2394 100644 --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -45,6 +45,7 @@ void MappingTraits::mapping(IO &IO, static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) { IO.mapRequired("Type", Section.Type); IO.mapOptional("Relocations", Section.Relocations); + IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen); } static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) { diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index 400bedabc003..86f28e578e5d 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -75,6 +75,13 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { return Default; } +bool ArgList::hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, + bool Default) const { + if (Arg *A = getLastArgNoClaim(Pos, Neg)) + return A->getOption().matches(Pos); + return Default; +} + bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, bool Default) const { if (Arg *A = getLastArg(Pos, PosAlias, Neg)) diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 759e15f4c443..a371bd21f026 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -593,10 +593,11 @@ bool TGParser::resolveArguments(Record *Rec, ArrayRef ArgValues, for (auto *UnsolvedArgName : UnsolvedArgNames) { Init *Default = Rec->getValue(UnsolvedArgName)->getValue(); if (!Default->isComplete()) { - return Error(Loc, "value not specified for template argument (" + - UnsolvedArgName->getAsUnquotedString() + - ") of multiclass '" + Rec->getNameInitAsString() + - "'"); + std::string Name = UnsolvedArgName->getAsUnquotedString(); + Error(Loc, "value not specified for template argument '" + Name + "'"); + PrintNote(Rec->getFieldLoc(Name), + "declared in '" + Rec->getNameInitAsString() + "'"); + return true; } ArgValueHandler(UnsolvedArgName, Default); } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 05adbe27c948..8f50af4b71fd 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -148,6 +148,9 @@ def FeatureExperimentalZeroingPseudos def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; +def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r", + "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">; + def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)", [FeatureSVE, FeatureUseScalarIncVL]>; @@ -1137,7 +1140,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 FeatureLSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, - FeaturePredictableSelectIsExpensive]>; + FeaturePredictableSelectIsExpensive, + FeatureNoSVEFPLD1R]>; def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", "Neoverse V2 ARM processors", [ diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d66800664c0c..4d5676f34101 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1908,6 +1908,7 @@ static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB, return; const AArch64Subtarget &Subtarget = MF.getSubtarget(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + bool EmitAsyncCFI = MFI.needsAsyncDwarfUnwindInfo(MF); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL; @@ -1933,11 +1934,13 @@ static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB, TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameDestroy); + if (EmitAsyncCFI) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameDestroy); + } if (NeedsWinCFI) { *HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 13df87af6c7b..0605dfa63793 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1091,6 +1091,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasFullFP16()) { setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + setOperationAction(ISD::ConstantFP, MVT::bf16, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); @@ -9757,7 +9758,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero(); else if (VT == MVT::f32) IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); - else if (VT == MVT::f16) + else if (VT == MVT::f16 || VT == MVT::bf16) IsLegal = (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) || Imm.isPosZero(); @@ -15479,15 +15480,15 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!AM.HasBaseReg) return false; - // FIXME: Update this method to support scalable addressing modes. - if (Ty->isScalableTargetExtTy()) - return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale; + if (Ty->isScalableTy()) { + if (isa(Ty)) { + uint64_t VecElemNumBytes = + DL.getTypeSizeInBits(cast(Ty)->getElementType()) / 8; + return AM.HasBaseReg && !AM.BaseOffs && + (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes); + } - if (isa(Ty)) { - uint64_t VecElemNumBytes = - DL.getTypeSizeInBits(cast(Ty)->getElementType()) / 8; - return AM.HasBaseReg && !AM.BaseOffs && - (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes); + return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale; } // check reg + imm case: diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cd2b9df27a24..39135df285c2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1306,6 +1306,11 @@ def fpimm16 : Operand, let PrintMethod = "printFPImmOperand"; } +def fpimmbf16 : Operand, + FPImmLeaf; + def fpimm32 : Operand, FPImmLeaf opc, string asm, string rhs_kind, RegisterOperand RegType, ValueType AccumType, ValueType InputType, SDPatternOperator OpNode> : - BaseSIMDIndexedTied { // idx = H:L:M bits<3> idx; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 9d901fd70446..30bd580ad86a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2228,7 +2228,6 @@ bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { case AArch64::LDRWpre: case AArch64::LDURXi: case AArch64::LDRXpre: - case AArch64::LDRSWpre: case AArch64::LDURSWi: case AArch64::LDURHHi: case AArch64::LDURBBi: @@ -2438,7 +2437,6 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) { case AArch64::LDURXi: case AArch64::LDRXpre: case AArch64::LDURSWi: - case AArch64::LDRSWpre: return true; } } @@ -2559,8 +2557,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { // Can't merge/pair if the instruction modifies the base register. // e.g., ldr x0, [x0] // This case will never occur with an FI base. - // However, if the instruction is an LDRpre or - // STRpre, it can be merged. + // However, if the instruction is an LDR/STRpre, it can be merged. // For example: // ldr q0, [x11, #32]! // ldr q1, [x11, #16] @@ -3137,7 +3134,6 @@ int AArch64InstrInfo::getMemScale(unsigned Opc) { case AArch64::LDRSpre: case AArch64::LDRSWui: case AArch64::LDURSWi: - case AArch64::LDRSWpre: case AArch64::LDRWpre: case AArch64::LDRWui: case AArch64::LDURWi: @@ -3193,7 +3189,6 @@ bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) { return false; case AArch64::LDRWpre: case AArch64::LDRXpre: - case AArch64::LDRSWpre: case AArch64::LDRSpre: case AArch64::LDRDpre: case AArch64::LDRQpre: @@ -5438,8 +5433,8 @@ static bool getFNEGPatterns(MachineInstr &Root, auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool { MachineOperand &MO = Root.getOperand(1); MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg()); - if (MI != nullptr && MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) && - (MI->getOpcode() == Opcode) && + if (MI != nullptr && (MI->getOpcode() == Opcode) && + MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) && Root.getFlag(MachineInstr::MIFlag::FmContract) && Root.getFlag(MachineInstr::MIFlag::FmNsz) && MI->getFlag(MachineInstr::MIFlag::FmContract) && diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 3450ed29d142..9e72d37880c5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -262,6 +262,8 @@ def UseNegativeImmediates def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; +def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; + def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", @@ -2251,7 +2253,7 @@ def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; // Large STG to be expanded into a loop. $sz is the size, $Rn is start address. // $Rn_wback is one past the end of the range. $Rm is the loop counter. -let isCodeGenOnly=1, mayStore=1 in { +let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { def STGloop_wback : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, @@ -4355,16 +4357,23 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, Sched<[WriteF]>; } + // Similarly add aliases def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, Requires<[HasFullFP16]>; def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; -// Pattern for FP16 immediates +def : Pat<(bf16 fpimm0), + (FMOVH0)>; + +// Pattern for FP16 and BF16 immediates let Predicates = [HasFullFP16] in { def : Pat<(f16 fpimm:$in), - (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; + (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; + + def : Pat<(bf16 fpimm:$in), + (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>; } //===----------------------------------------------------------------------===// @@ -4617,6 +4626,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm FMOV : FPMoveImmediate<"fmov">; } +let Predicates = [HasFullFP16] in { + def : Pat<(bf16 fpimmbf16:$in), + (FMOVHi (fpimm16XForm bf16:$in))>; +} + //===----------------------------------------------------------------------===// // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 419b471db3a3..41af5522d967 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -293,8 +293,6 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; - case AArch64::LDRSWpre: - return AArch64::LDRWpre; } } @@ -374,8 +372,6 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::LDRSWui: case AArch64::LDURSWi: return AArch64::LDPSWi; - case AArch64::LDRSWpre: - return AArch64::LDPSWpre; } } @@ -589,8 +585,6 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) { return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi); case AArch64::LDRXpre: return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi); - case AArch64::LDRSWpre: - return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi); } } @@ -1346,7 +1340,7 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, return false; // The STRpre - STRui and - // LDRpre-LDRui + // LDRpre-LDRui // are candidate pairs that can be merged. if (isPreLdStPairCandidate(FirstMI, MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ad404e8dab2a..b4f02e0dd203 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -204,10 +204,18 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>; +def AArch64fadd_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), + (AArch64fadd_p node:$op1, node:$op2, node:$op3), [{ + return N->getFlags().hasAllowContract(); +}]>; def AArch64fadd_p_nsz : PatFrag<(ops node:$op1, node:$op2, node:$op3), (AArch64fadd_p node:$op1, node:$op2, node:$op3), [{ return N->getFlags().hasNoSignedZeros(); }]>; +def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), + (AArch64fsub_p node:$op1, node:$op2, node:$op3), [{ + return N->getFlags().hasAllowContract(); +}]>; def AArch64fsub_p_nsz : PatFrag<(ops node:$op1, node:$op2, node:$op3), (AArch64fsub_p node:$op1, node:$op2, node:$op3), [{ return N->getFlags().hasNoSignedZeros(); @@ -363,14 +371,12 @@ def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2), (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>; def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), - [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), - (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), node:$zn, node:$zm, node:$za), node:$za)]>; + [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za)]>; def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), [(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm), (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za), - (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za), - (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), (AArch64fneg_mt (AArch64ptrue 31), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>; + (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za)]>; def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), [(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm), @@ -423,18 +429,15 @@ def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), (xor node:$op1, (xor node:$op2, node:$op3))]>; -class fma_patfrags - : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), - [(intrinsic node:$pred, node:$op1, node:$op2, node:$op3), - (vselect node:$pred, (add (SVEAllActive), node:$op1, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)], -[{ - if (N->getOpcode() == ISD::VSELECT) - return N->getOperand(1)->getFlags().hasAllowContract(); - return true; // it's the intrinsic -}]>; +def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), + [(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm), + (vselect node:$pg, (AArch64fadd_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za), + (vselect node:$pg, (AArch64fma_p (SVEAllActive), node:$zn, node:$zm, node:$za), node:$za)]>; -def AArch64fmla_m1 : fma_patfrags; -def AArch64fmls_m1 : fma_patfrags; +def AArch64fmls_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), + [(int_aarch64_sve_fmls node:$pg, node:$za, node:$zn, node:$zm), + (vselect node:$pg, (AArch64fsub_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za), + (vselect node:$pg, (AArch64fma_p (SVEAllActive), (AArch64fneg_mt (SVEAllActive), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>; def AArch64add_m1 : VSelectUnpredOrPassthruPatFrags; def AArch64sub_m1 : VSelectUnpredOrPassthruPatFrags; @@ -2352,13 +2355,15 @@ let Predicates = [HasSVEorSME] in { // LDR1 of 64-bit data defm : LD1RPat; - // LD1R of FP data - defm : LD1RPat; - defm : LD1RPat; - defm : LD1RPat; - defm : LD1RPat; - defm : LD1RPat; - defm : LD1RPat; + let Predicates = [HasSVEorSME, UseSVEFPLD1R] in { + // LD1R of FP data + defm : LD1RPat; + defm : LD1RPat; + defm : LD1RPat; + defm : LD1RPat; + defm : LD1RPat; + defm : LD1RPat; + } // LD1R of 128-bit masked data multiclass ld1rq_pat{ diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 9ab86684856e..5e20d16464c4 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -394,7 +394,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool useSVEForFixedLengthVectors() const { if (!isNeonAvailable()) - return true; + return hasSVE(); // Prefer NEON unless larger SVE registers are available. return hasSVE() && getMinSVEVectorSizeInBits() >= 256; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index a66d2ddee652..e78d8bb487a9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -564,6 +564,11 @@ void AArch64CallLowering::saveVarArgRegisters( if (IsWin64CC) { GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -static_cast(GPRSaveSize), false); + if (GPRSaveSize & 15) + // The extra size here, if triggered, will always be 8. + MFI.CreateFixedObject(16 - (GPRSaveSize & 15), + -static_cast(alignTo(GPRSaveSize, 16)), + false); } else GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 118862b8c317..4902ec3639ec 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2317,7 +2317,10 @@ multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, SVEPseudo2Instr, SVEInstr2Rev; def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _H)>; + def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; + def : SVE_4_Op_Pat(NAME # _S)>; def : SVE_4_Op_Pat(NAME # _D)>; } @@ -7200,6 +7203,10 @@ multiclass sve_int_perm_cpy_v { def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)), (!cast(NAME # _H) $passthru, $pg, $splat)>; + def : Pat<(nxv4f16 (op nxv4i1:$pg, f16:$splat, nxv4f16:$passthru)), + (!cast(NAME # _H) $passthru, $pg, $splat)>; + def : Pat<(nxv2f16 (op nxv2i1:$pg, f16:$splat, nxv2f16:$passthru)), + (!cast(NAME # _H) $passthru, $pg, $splat)>; def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)), (!cast(NAME # _S) $passthru, $pg, $splat)>; def : Pat<(nxv4f32 (op nxv4i1:$pg, f32:$splat, nxv4f32:$passthru)), diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b82db82de84e..c25194c02f72 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -41,7 +41,6 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIOptimizeVGPRLiveRangePass(); FunctionPass *createSIFixSGPRCopiesPass(); -FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIPreAllocateWWMRegsPass(); @@ -145,9 +144,6 @@ extern char &SIFixSGPRCopiesID; void initializeSIFixVGPRCopiesPass(PassRegistry &); extern char &SIFixVGPRCopiesID; -void initializeSILowerWWMCopiesPass(PassRegistry &); -extern char &SILowerWWMCopiesID; - void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 254d02d4ce5b..39e00a037bdd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2528,7 +2528,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src, std::pair AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Src, SDNodeFlags Flags) const { - if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) + if (!needsDenormHandlingF32(DAG, Src, Flags)) return {}; MVT VT = MVT::f32; @@ -2609,9 +2609,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags); } - SDValue Lowered = LowerFLOGUnsafe( - X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2, - Flags); + SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags); if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered, DAG.getTargetConstant(0, DL, MVT::i32), Flags); @@ -2696,11 +2694,36 @@ SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { // Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a // promote f16 operation. SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL, - SelectionDAG &DAG, - double Log2BaseInverted, + SelectionDAG &DAG, bool IsLog10, SDNodeFlags Flags) const { EVT VT = Src.getValueType(); unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2; + + double Log2BaseInverted = + IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + + if (VT == MVT::f32) { + auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags); + if (ScaledInput) { + SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags); + SDValue ScaledResultOffset = + DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT); + + SDValue Zero = DAG.getConstantFP(0.0f, SL, VT); + + SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled, + ScaledResultOffset, Zero, Flags); + + SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT); + + if (Subtarget->hasFastFMAF32()) + return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset, + Flags); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags); + return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset); + } + } + SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags); SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); @@ -2728,7 +2751,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const { assert(VT == MVT::f32); - if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags)) + if (!needsDenormHandlingF32(DAG, Src, Flags)) return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags); // bool needs_scaling = x < -0x1.f80000p+6f; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 26b91155ba85..c39093b9bb6b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -72,7 +72,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, - double Log2BaseInverted, SDNodeFlags Flags) const; + bool IsLog10, SDNodeFlags Flags) const; SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 120c00b14a36..bbf4db12f5ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3037,8 +3037,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src, std::pair AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const { - if (allowApproxFunc(B.getMF(), Flags) || - !needsDenormHandlingF32(B.getMF(), Src, Flags)) + if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) return {}; const LLT F32 = LLT::scalar(32); @@ -3132,16 +3131,13 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) || TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) { - const double Log2BaseInv = - IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; - if (Ty == F16 && !ST.has16BitInsts()) { Register LogVal = MRI.createGenericVirtualRegister(F32); auto PromoteSrc = B.buildFPExt(F32, X); - legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags); + legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), IsLog10, Flags); B.buildFPTrunc(Dst, LogVal); } else { - legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags); + legalizeFlogUnsafe(B, Dst, X, IsLog10, Flags); } MI.eraseFromParent(); @@ -3225,10 +3221,36 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, } bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, - Register Src, - double Log2BaseInverted, + Register Src, bool IsLog10, unsigned Flags) const { + const double Log2BaseInverted = + IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; + LLT Ty = B.getMRI()->getType(Dst); + + if (Ty == LLT::scalar(32)) { + auto [ScaledInput, IsScaled] = getScaledLogInput(B, Src, Flags); + if (ScaledInput) { + auto LogSrc = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) + .addUse(Src) + .setMIFlags(Flags); + auto ScaledResultOffset = B.buildFConstant(Ty, -32.0 * Log2BaseInverted); + auto Zero = B.buildFConstant(Ty, 0.0); + auto ResultOffset = + B.buildSelect(Ty, IsScaled, ScaledResultOffset, Zero, Flags); + auto Log2Inv = B.buildFConstant(Ty, Log2BaseInverted); + + if (ST.hasFastFMAF32()) + B.buildFMA(Dst, LogSrc, Log2Inv, ResultOffset, Flags); + else { + auto Mul = B.buildFMul(Ty, LogSrc, Log2Inv, Flags); + B.buildFAdd(Dst, Mul, ResultOffset, Flags); + } + + return true; + } + } + auto Log2Operand = Ty == LLT::scalar(16) ? B.buildFLog2(Ty, Src, Flags) : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false) @@ -3264,11 +3286,10 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI, assert(Ty == F32); - if (allowApproxFunc(B.getMF(), Flags) || - !needsDenormHandlingF32(B.getMF(), Src, Flags)) { + if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) { B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef{Dst}, false) - .addUse(Src) - .setMIFlags(Flags); + .addUse(Src) + .setMIFlags(Flags); MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 04773f275c87..534bb2c87ea3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -85,7 +85,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src, - double Log2BaseInverted, unsigned Flags) const; + bool IsLog10, unsigned Flags) const; bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src, unsigned Flags) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 1d69f0434b58..17025867c1da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -386,6 +386,8 @@ static Value *promoteAllocaUserToVector( }; Type *VecEltTy = VectorTy->getElementType(); + const unsigned NumVecElts = VectorTy->getNumElements(); + switch (Inst->getOpcode()) { case Instruction::Load: { // Loads can only be lowered if the value is known. @@ -413,13 +415,13 @@ static Value *promoteAllocaUserToVector( // Loading a subvector. if (isa(AccessTy)) { assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy))); - const unsigned NumElts = AccessSize / DL.getTypeStoreSize(VecEltTy); - auto *SubVecTy = FixedVectorType::get(VecEltTy, NumElts); + const unsigned NumLoadedElts = AccessSize / DL.getTypeStoreSize(VecEltTy); + auto *SubVecTy = FixedVectorType::get(VecEltTy, NumLoadedElts); assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy)); unsigned IndexVal = cast(Index)->getZExtValue(); Value *SubVec = PoisonValue::get(SubVecTy); - for (unsigned K = 0; K < NumElts; ++K) { + for (unsigned K = 0; K < NumLoadedElts; ++K) { SubVec = Builder.CreateInsertElement( SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K); } @@ -465,8 +467,9 @@ static Value *promoteAllocaUserToVector( // Storing a subvector. if (isa(AccessTy)) { assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy))); - const unsigned NumElts = AccessSize / DL.getTypeStoreSize(VecEltTy); - auto *SubVecTy = FixedVectorType::get(VecEltTy, NumElts); + const unsigned NumWrittenElts = + AccessSize / DL.getTypeStoreSize(VecEltTy); + auto *SubVecTy = FixedVectorType::get(VecEltTy, NumWrittenElts); assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy)); if (SubVecTy->isPtrOrPtrVectorTy()) @@ -478,7 +481,8 @@ static Value *promoteAllocaUserToVector( unsigned IndexVal = cast(Index)->getZExtValue(); Value *CurVec = GetOrLoadCurrentVectorValue(); - for (unsigned K = 0; (IndexVal + K) < NumElts; ++K) { + for (unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts); + ++K) { CurVec = Builder.CreateInsertElement( CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f90c8e4bdddd..87ef2333e2ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -364,7 +364,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPUDAGToDAGISelPass(*PR); initializeGCNDPPCombinePass(*PR); initializeSILowerI1CopiesPass(*PR); - initializeSILowerWWMCopiesPass(*PR); initializeSILowerSGPRSpillsPass(*PR); initializeSIFixSGPRCopiesPass(*PR); initializeSIFixVGPRCopiesPass(*PR); @@ -1297,7 +1296,6 @@ void GCNPassConfig::addOptimizedRegAlloc() { } bool GCNPassConfig::addPreRewrite() { - addPass(&SILowerWWMCopiesID); if (EnableRegReassign) addPass(&GCNNSAReassignID); return true; @@ -1352,8 +1350,6 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { addPass(&SILowerSGPRSpillsID); addPass(createVGPRAllocPass(false)); - - addPass(&SILowerWWMCopiesID); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 865caae240f3..903e726c667d 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -99,10 +99,10 @@ static void getVGPRSpillLaneOrTempRegister( SGPR, PrologEpilogSGPRSaveRestoreInfo( SGPRSaveKind::SPILL_TO_VGPR_LANE, FI)); - LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front(); - dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " - << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane - << '\n';); + LLVM_DEBUG( + auto Spill = MFI->getPrologEpilogSGPRSpillToVGPRLanes(FI).front(); + dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " + << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); } else { // Remove dead index MF.getFrameInfo().RemoveStackObject(FI); @@ -264,7 +264,7 @@ class PrologEpilogSGPRSpillBuilder { assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); ArrayRef Spill = - FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); + FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI); assert(Spill.size() == NumSubRegs); for (unsigned I = 0; I < NumSubRegs; ++I) { @@ -309,7 +309,7 @@ class PrologEpilogSGPRSpillBuilder { void restoreFromVGPRLane(const int FI) { assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); ArrayRef Spill = - FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); + FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI); assert(Spill.size() == NumSubRegs); for (unsigned I = 0; I < NumSubRegs; ++I) { @@ -1353,8 +1353,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) { assert(RS != nullptr); - RS->enterBasicBlockEnd(MBB); - RS->backward(MI); + // FIXME: change to enterBasicBlockEnd() + RS->enterBasicBlock(MBB); TRI->eliminateFrameIndex(MI, 0, FIOp, RS); SpillFIs.set(FI); continue; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3148f49ff0d5..b7b90e23e895 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -278,10 +278,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, case ISD::UNDEF: case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: - case ISD::EXTRACT_SUBVECTOR: case ISD::SCALAR_TO_VECTOR: case ISD::IS_FPCLASS: break; + case ISD::EXTRACT_SUBVECTOR: case ISD::INSERT_SUBVECTOR: case ISD::CONCAT_VECTORS: setOperationAction(Op, VT, Custom); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 4b0283b27a6f..a74b917f82bf 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -382,6 +382,8 @@ class SIInsertWaitcnts : public MachineFunctionPass { bool ForceEmitZeroWaitcnts; bool ForceEmitWaitcnt[NUM_INST_CNTS]; + bool OptNone; + // S_ENDPGM instructions before which we should insert a DEALLOC_VGPRS // message. DenseSet ReleaseVGPRInsts; @@ -1040,7 +1042,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // do this if there are no outstanding scratch stores. else if (MI.getOpcode() == AMDGPU::S_ENDPGM || MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { - if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && + if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !OptNone && ScoreBrackets.getScoreRange(VS_CNT) != 0 && !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS)) ReleaseVGPRInsts.insert(&MI); @@ -1822,6 +1824,9 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { for (auto T : inst_counter_types()) ForceEmitWaitcnt[T] = false; + OptNone = MF.getFunction().hasOptNone() || + MF.getTarget().getOptLevel() == CodeGenOpt::None; + HardwareLimits Limits = {}; Limits.VmcntMax = AMDGPU::getVmcntBitMask(IV); Limits.ExpcntMax = AMDGPU::getExpcntBitMask(IV); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 278cf2b69ee3..0f954732a5ee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2414,14 +2414,6 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { return std::pair(Split[0], Split[1]); } -std::optional -SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { - if (MI.getOpcode() == AMDGPU::WWM_COPY) - return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; - - return std::nullopt; -} - bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, @@ -3088,7 +3080,6 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) { case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: - case AMDGPU::WWM_COPY: case AMDGPU::V_ACCVGPR_WRITE_B32_e64: case AMDGPU::V_ACCVGPR_READ_B32_e64: case AMDGPU::V_ACCVGPR_MOV_B32: @@ -4978,8 +4969,7 @@ void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, - bool IsSCCLive, - SlotIndexes *Indexes) const { + bool IsSCCLive) const { const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); bool IsWave32 = ST.isWave32(); @@ -4989,34 +4979,23 @@ void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF, // the single instruction S_OR_SAVEEXEC that clobbers SCC. unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg) - .addReg(Exec, RegState::Kill); - auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); - if (Indexes) { - Indexes->insertMachineInstrInMaps(*StoreExecMI); - Indexes->insertMachineInstrInMaps(*FlipExecMI); - } + BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg).addReg(Exec, RegState::Kill); + BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1); } else { const unsigned OrSaveExec = IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1); SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. - if (Indexes) - Indexes->insertMachineInstrInMaps(*SaveExec); } } void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, Register Reg, - SlotIndexes *Indexes) const { + const DebugLoc &DL, Register Reg) const { unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - auto ExecRestoreMI = - BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill); - if (Indexes) - Indexes->insertMachineInstrInMaps(*ExecRestoreMI); + BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill); } static const TargetRegisterClass * @@ -8001,16 +7980,6 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const { return ArrayRef(TargetFlags); } -unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg, - const MachineFunction &MF) const { - const SIMachineFunctionInfo *MFI = MF.getInfo(); - assert(SrcReg.isVirtual()); - if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG)) - return AMDGPU::WWM_COPY; - - return AMDGPU::COPY; -} - bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); @@ -8562,7 +8531,7 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl( // A similar issue also exists with spilling and reloading $exec registers. // // To prevent that, constrain the %0 register class here. - if (isFullCopyInstr(MI)) { + if (MI.isFullCopy()) { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); if ((DstReg.isVirtual() || SrcReg.isVirtual()) && @@ -8659,7 +8628,7 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32) return InstructionUniformity::AlwaysUniform; - if (isCopyInstr(MI)) { + if (MI.isCopy()) { const MachineOperand &srcOp = MI.getOperand(1); if (srcOp.isReg() && srcOp.getReg().isPhysical()) { const TargetRegisterClass *regClass = diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b25aae7b2fb0..66f93e5640d6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -170,12 +170,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; protected: - /// If the specific machine instruction is a instruction that moves/copies - /// value from one register to another register return destination and source - /// registers as machine operands. - std::optional - isCopyInstrImpl(const MachineInstr &MI) const override; - bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const; @@ -833,7 +827,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { } bool isVGPRCopy(const MachineInstr &MI) const { - assert(isCopyInstr(MI)); + assert(MI.isCopy()); Register Dest = MI.getOperand(0).getReg(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -903,7 +897,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { if (OpIdx >= MI.getDesc().NumOperands) return false; - if (isCopyInstr(MI)) { + if (MI.isCopy()) { unsigned Size = getOpSize(MI, OpIdx); assert(Size == 8 || Size == 4); @@ -952,12 +946,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, Register Reg, bool IsSCCLive, - SlotIndexes *Indexes = nullptr) const; + const DebugLoc &DL, Register Reg, + bool IsSCCLive) const; void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - Register Reg, SlotIndexes *Indexes = nullptr) const; + Register Reg) const; /// Return the correct register class for \p OpNo. For target-specific /// instructions, this will return the register class that has been defined @@ -1149,9 +1143,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override; - unsigned getLiveRangeSplitOpcode(Register Reg, - const MachineFunction &MF) const override; - bool isBasicBlockPrologue(const MachineInstr &MI) const override; MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7fe76b4c13ca..2edebccef7d8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -172,13 +172,6 @@ def STRICT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def WWM_COPY : SPseudoInstSI < - (outs unknown:$dst), (ins unknown:$src)> { - let hasSideEffects = 0; - let isAsCheapAsAMove = 1; - let isConvergent = 1; -} - def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> { let Uses = [EXEC]; let Defs = [EXEC, SCC]; diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 47d28d5d0eab..d21107c02ef7 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -50,9 +50,7 @@ class SILowerSGPRSpills : public MachineFunctionPass { SILowerSGPRSpills() : MachineFunctionPass(ID) {} void calculateSaveRestoreBlocks(MachineFunction &MF); - bool spillCalleeSavedRegs(MachineFunction &MF, - SmallVectorImpl &CalleeSavedFIs); - void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS); + bool spillCalleeSavedRegs(MachineFunction &MF); bool runOnMachineFunction(MachineFunction &MF) override; @@ -60,13 +58,6 @@ class SILowerSGPRSpills : public MachineFunctionPass { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - - MachineFunctionProperties getClearedProperties() const override { - // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. - return MachineFunctionProperties() - .set(MachineFunctionProperties::Property::IsSSA) - .set(MachineFunctionProperties::Property::NoVRegs); - } }; } // end anonymous namespace @@ -206,8 +197,7 @@ static void updateLiveness(MachineFunction &MF, ArrayRef CSI) { EntryBB.sortUniqueLiveIns(); } -bool SILowerSGPRSpills::spillCalleeSavedRegs( - MachineFunction &MF, SmallVectorImpl &CalleeSavedFIs) { +bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -238,7 +228,6 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( TRI->getSpillAlign(*RC), true); CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); - CalleeSavedFIs.push_back(JunkFI); } } @@ -259,50 +248,6 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( return false; } -void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, - LiveIntervals *LIS) { - // TODO: This is a workaround to avoid the unmodelled liveness computed with - // whole-wave virtual registers when allocated together with the regular VGPR - // virtual registers. Presently, the liveness computed during the regalloc is - // only uniform (or single lane aware) and it doesn't take account of the - // divergent control flow that exists for our GPUs. Since the WWM registers - // can modify inactive lanes, the wave-aware liveness should be computed for - // the virtual registers to accurately plot their interferences. Without - // having the divergent CFG for the function, it is difficult to implement the - // wave-aware liveness info. Until then, we conservatively extend the liveness - // of the wwm registers into the entire function so that they won't be reused - // without first spilling/splitting their liveranges. - SIMachineFunctionInfo *MFI = MF.getInfo(); - - // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks. - for (auto Reg : MFI->getSGPRSpillVGPRs()) { - for (MachineBasicBlock *SaveBlock : SaveBlocks) { - MachineBasicBlock::iterator InsertBefore = SaveBlock->begin(); - auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(), - TII->get(AMDGPU::IMPLICIT_DEF), Reg); - MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); - if (LIS) { - LIS->InsertMachineInstrInMaps(*MIB); - } - } - } - - // Insert the KILL in the return blocks to extend their liveness untill the - // end of function. Insert a separate KILL for each VGPR. - for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { - MachineBasicBlock::iterator InsertBefore = - RestoreBlock->getFirstTerminator(); - for (auto Reg : MFI->getSGPRSpillVGPRs()) { - auto MIB = - BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(), - TII->get(TargetOpcode::KILL)); - MIB.addReg(Reg); - if (LIS) - LIS->InsertMachineInstrInMaps(*MIB); - } - } -} - bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); @@ -316,8 +261,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // First, expose any CSR SGPR spills. This is mostly the same as what PEI // does, but somewhat simpler. calculateSaveRestoreBlocks(MF); - SmallVector CalleeSavedFIs; - bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); + bool HasCSRs = spillCalleeSavedRegs(MF); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -331,7 +275,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; bool NewReservedRegs = false; - bool SpilledToVirtVGPRLanes = false; // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be // handled as SpilledToReg in regular PrologEpilogInserter. @@ -354,53 +297,23 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); - - bool IsCalleeSaveSGPRSpill = - std::find(CalleeSavedFIs.begin(), CalleeSavedFIs.end(), FI) != - CalleeSavedFIs.end(); - if (IsCalleeSaveSGPRSpill) { - // Spill callee-saved SGPRs into physical VGPR lanes. - - // TODO: This is to ensure the CFIs are static for efficient frame - // unwinding in the debugger. Spilling them into virtual VGPR lanes - // involve regalloc to allocate the physical VGPRs and that might - // cause intermediate spill/split of such liveranges for successful - // allocation. This would result in broken CFI encoding unless the - // regalloc aware CFI generation to insert new CFIs along with the - // intermediate spills is implemented. There is no such support - // currently exist in the LLVM compiler. - if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) { - NewReservedRegs = true; - bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( - MI, FI, nullptr, Indexes, LIS, true); - if (!Spilled) - llvm_unreachable( - "failed to spill SGPR to physical VGPR lane when allocated"); - } - } else { - if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { - bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( - MI, FI, nullptr, Indexes, LIS); - if (!Spilled) - llvm_unreachable( - "failed to spill SGPR to virtual VGPR lane when allocated"); - SpillFIs.set(FI); - SpilledToVirtVGPRLanes = true; - } + if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { + NewReservedRegs = true; + bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( + MI, FI, nullptr, Indexes, LIS); + (void)Spilled; + assert(Spilled && "failed to spill SGPR to VGPR when allocated"); + SpillFIs.set(FI); } } } - if (SpilledToVirtVGPRLanes) { - extendWWMVirtRegLiveness(MF, LIS); - if (LIS) { - // Compute the LiveInterval for the newly created virtual registers. - for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) - LIS->createAndComputeVirtRegInterval(Reg); - } - } - + // FIXME: Adding to live-ins redundant with reserving registers. for (MachineBasicBlock &MBB : MF) { + for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) + MBB.addLiveIn(Reg); + MBB.sortUniqueLiveIns(); + // FIXME: The dead frame indices are replaced with a null register from // the debug value instructions. We should instead, update it with the // correct register value. But not sure the register value alone is @@ -421,10 +334,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // lane". FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); - MadeChange = true; - } - - if (SpilledToVirtVGPRLanes) { const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); // Shift back the reserved SGPR for EXEC copy into the lowest range. // This SGPR is reserved to handle the whole-wave spill/copy operations @@ -433,21 +342,20 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); + + MadeChange = true; } else { - // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM - // spills/copies. Reset the SGPR reserved for EXEC copy. + // No SGPR spills and hence there won't be any WWM spills/copies. Reset the + // SGPR reserved for EXEC copy. FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); } SaveBlocks.clear(); RestoreBlocks.clear(); - // Updated the reserved registers with any physical VGPRs added for SGPR - // spills. - if (NewReservedRegs) { - for (Register Reg : FuncInfo->getWWMReservedRegs()) - MRI.reserveReg(Reg, TRI); - } + // Updated the reserved registers with any VGPRs added for SGPR spills. + if (NewReservedRegs) + MRI.freezeReservedRegs(MF); return MadeChange; } diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp deleted file mode 100644 index 9c3cd1bbd6b0..000000000000 --- a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Lowering the WWM_COPY instructions for various register classes. -/// AMDGPU target generates WWM_COPY instruction to differentiate WWM -/// copy from COPY. This pass generates the necessary exec mask manipulation -/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to -/// COPY. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/InitializePasses.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-lower-wwm-copies" - -namespace { - -class SILowerWWMCopies : public MachineFunctionPass { -public: - static char ID; - - SILowerWWMCopies() : MachineFunctionPass(ID) { - initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "SI Lower WWM Copies"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - -private: - bool isSCCLiveAtMI(const MachineInstr &MI); - void addToWWMSpills(MachineFunction &MF, Register Reg); - - LiveIntervals *LIS; - SlotIndexes *Indexes; - VirtRegMap *VRM; - const SIRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - SIMachineFunctionInfo *MFI; -}; - -} // End anonymous namespace. - -INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", - false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, - false) - -char SILowerWWMCopies::ID = 0; - -char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; - -bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { - // We can't determine the liveness info if LIS isn't available. Early return - // in that case and always assume SCC is live. - if (!LIS) - return true; - - LiveRange &LR = - LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); - SlotIndex Idx = LIS->getInstructionIndex(MI); - return LR.liveAt(Idx); -} - -// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills -// for preserving its entire lanes at function prolog/epilog. -void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { - if (Reg.isPhysical()) - return; - - Register PhysReg = VRM->getPhys(Reg); - assert(PhysReg != VirtRegMap::NO_PHYS_REG && - "should have allocated a physical register"); - - MFI->allocateWWMSpill(MF, PhysReg); -} - -bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { - const GCNSubtarget &ST = MF.getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - - MFI = MF.getInfo(); - LIS = getAnalysisIfAvailable(); - Indexes = getAnalysisIfAvailable(); - VRM = getAnalysisIfAvailable(); - TRI = ST.getRegisterInfo(); - MRI = &MF.getRegInfo(); - - if (!MFI->hasVRegFlags()) - return false; - - bool Changed = false; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (MI.getOpcode() != AMDGPU::WWM_COPY) - continue; - - // TODO: Club adjacent WWM ops between same exec save/restore - assert(TII->isVGPRCopy(MI)); - - // For WWM vector copies, manipulate the exec mask around the copy - // instruction. - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsertPt = MI.getIterator(); - Register RegForExecCopy = MFI->getSGPRForEXECCopy(); - TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, - isSCCLiveAtMI(MI), Indexes); - TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); - addToWWMSpills(MF, MI.getOperand(0).getReg()); - LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); - - // Lower WWM_COPY back to COPY - MI.setDesc(TII->get(AMDGPU::COPY)); - Changed |= true; - } - } - - return Changed; -} diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 219464eac9ec..c9376d0ea653 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -314,23 +314,37 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, return false; } -bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills( - MachineFunction &MF, int FI, unsigned LaneIndex) { +bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF, + int FI, + unsigned LaneIndex) { + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); Register LaneVGPR; if (!LaneIndex) { - LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we will not + // partially spill the SGPR to VGPRs. + SGPRSpillToVGPRLanes.erase(FI); + return false; + } + SpillVGPRs.push_back(LaneVGPR); + // Add this register as live-in to all blocks to avoid machine verifier + // complaining about use of an undefined physical register. + for (MachineBasicBlock &BB : MF) + BB.addLiveIn(LaneVGPR); } else { LaneVGPR = SpillVGPRs.back(); } - SGPRSpillsToVirtualVGPRLanes[FI].push_back( + SGPRSpillToVGPRLanes[FI].push_back( SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); return true; } -bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( +bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills( MachineFunction &MF, int FI, unsigned LaneIndex) { const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -341,21 +355,16 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( if (LaneVGPR == AMDGPU::NoRegister) { // We have no VGPRs left for spilling SGPRs. Reset because we will not // partially spill the SGPR to VGPRs. - SGPRSpillsToPhysicalVGPRLanes.erase(FI); + PrologEpilogSGPRSpillToVGPRLanes.erase(FI); return false; } allocateWWMSpill(MF, LaneVGPR); - reserveWWMRegister(LaneVGPR); - for (MachineBasicBlock &MBB : MF) { - MBB.addLiveIn(LaneVGPR); - MBB.sortUniqueLiveIns(); - } } else { - LaneVGPR = WWMReservedRegs.back(); + LaneVGPR = WWMSpills.back().first; } - SGPRSpillsToPhysicalVGPRLanes[FI].push_back( + PrologEpilogSGPRSpillToVGPRLanes[FI].push_back( SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); return true; } @@ -364,8 +373,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog) { std::vector &SpillLanes = - IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI] - : SGPRSpillsToVirtualVGPRLanes[FI]; + IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI] + : SGPRSpillToVGPRLanes[FI]; // This has already been allocated. if (!SpillLanes.empty()) @@ -386,14 +395,15 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, "not spilling SGPRs to VGPRs"); unsigned &NumSpillLanes = - IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes; + IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes; for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { unsigned LaneIndex = (NumSpillLanes % WaveSize); - bool Allocated = IsPrologEpilog - ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex) - : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex); + bool Allocated = + IsPrologEpilog + ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex) + : allocateVGPRForSGPRSpills(MF, FI, LaneIndex); if (!Allocated) { NumSpillLanes -= I; return false; @@ -474,25 +484,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, bool SIMachineFunctionInfo::removeDeadFrameIndices( MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { - // Remove dead frame indices from function frame, however keep FP & BP since - // spills for them haven't been inserted yet. And also make sure to remove the - // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, - // otherwise, it could result in an unexpected side effect and bug, in case of - // any re-mapping of freed frame indices by later pass(es) like "stack slot + // Remove dead frame indices from function frame. And also make sure to remove + // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it + // could result in an unexpected side effect and bug, in case of any + // re-mapping of freed frame indices by later pass(es) like "stack slot // coloring". - for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) { + for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) { MFI.RemoveStackObject(R.first); - SGPRSpillsToVirtualVGPRLanes.erase(R.first); + SGPRSpillToVGPRLanes.erase(R.first); } - // Remove the dead frame indices of CSR SGPRs which are spilled to physical - // VGPR lanes during SILowerSGPRSpills pass. - if (!ResetSGPRSpillStackIDs) { - for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) { - MFI.RemoveStackObject(R.first); - SGPRSpillsToPhysicalVGPRLanes.erase(R.first); - } - } bool HaveSGPRToMemory = false; if (ResetSGPRSpillStackIDs) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 37572d30dff6..3b4747adf125 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -496,16 +496,15 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, }; private: - // To track virtual VGPR + lane index for each subregister of the SGPR spilled - // to frameindex key during SILowerSGPRSpills pass. + // To track VGPR + lane index for each subregister of the SGPR spilled to + // frameindex key during SILowerSGPRSpills pass. + DenseMap> SGPRSpillToVGPRLanes; + // To track VGPR + lane index for spilling special SGPRs like Frame Pointer + // identified during PrologEpilogInserter. DenseMap> - SGPRSpillsToVirtualVGPRLanes; - // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs - // like Frame Pointer identified during PrologEpilogInserter. - DenseMap> - SGPRSpillsToPhysicalVGPRLanes; - unsigned NumVirtualVGPRSpillLanes = 0; - unsigned NumPhysicalVGPRSpillLanes = 0; + PrologEpilogSGPRSpillToVGPRLanes; + unsigned NumVGPRSpillLanes = 0; + unsigned NumVGPRPrologEpilogSpillLanes = 0; SmallVector SpillVGPRs; using WWMSpillsMap = MapVector; // To track the registers used in instructions that can potentially modify the @@ -549,10 +548,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, private: Register VGPRForAGPRCopy; - bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI, - unsigned LaneIndex); - bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI, - unsigned LaneIndex); + bool allocateVGPRForSGPRSpills(MachineFunction &MF, int FI, + unsigned LaneIndex); + bool allocateVGPRForPrologEpilogSGPRSpills(MachineFunction &MF, int FI, + unsigned LaneIndex); public: Register getVGPRForAGPRCopy() const { @@ -584,9 +583,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, SIModeRegisterDefaults getMode() const { return Mode; } ArrayRef - getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const { - auto I = SGPRSpillsToVirtualVGPRLanes.find(FrameIndex); - return (I == SGPRSpillsToVirtualVGPRLanes.end()) + getSGPRSpillToVGPRLanes(int FrameIndex) const { + auto I = SGPRSpillToVGPRLanes.find(FrameIndex); + return (I == SGPRSpillToVGPRLanes.end()) ? ArrayRef() : ArrayRef(I->second); } @@ -648,9 +647,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, } ArrayRef - getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const { - auto I = SGPRSpillsToPhysicalVGPRLanes.find(FrameIndex); - return (I == SGPRSpillsToPhysicalVGPRLanes.end()) + getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const { + auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex); + return (I == PrologEpilogSGPRSpillToVGPRLanes.end()) ? ArrayRef() : ArrayRef(I->second); } @@ -668,8 +667,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, return VRegFlags.inBounds(Reg) && VRegFlags[Reg] & Flag; } - bool hasVRegFlags() { return VRegFlags.size(); } - void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4, Align Alignment = Align(4)); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 1d50dff4a7d9..c2a272166241 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -712,6 +712,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) reserveRegisterTuples(Reserved, Reg); + for (auto Reg : MFI->getSGPRSpillVGPRs()) + reserveRegisterTuples(Reserved, Reg); + return Reserved; } @@ -1733,13 +1736,10 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, - LiveIntervals *LIS, bool OnlyToVGPR, - bool SpillToPhysVGPRLane) const { + LiveIntervals *LIS, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = - SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) - : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); + ArrayRef VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -1856,13 +1856,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, - LiveIntervals *LIS, bool OnlyToVGPR, - bool SpillToPhysVGPRLane) const { + LiveIntervals *LIS, bool OnlyToVGPR) const { SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); - ArrayRef VGPRSpills = - SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) - : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); + ArrayRef VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; @@ -2008,7 +2005,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, /// handled. bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, - SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const { + SlotIndexes *Indexes, LiveIntervals *LIS) const { switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: @@ -2024,7 +2021,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); + return spillSGPR(MI, FI, RS, Indexes, LIS, true); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S384_RESTORE: @@ -2039,7 +2036,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: - return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); + return restoreSGPR(MI, FI, RS, Indexes, LIS, true); default: llvm_unreachable("not an SGPR spill instruction"); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 17fce43891c5..2120b47c581e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -142,17 +142,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill = true) const; - /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a - /// free VGPR lane to spill. + /// If \p OnlyToVGPR is true, this will only succeed if this bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false, - bool SpillToPhysVGPRLane = false) const; + bool OnlyToVGPR = false) const; bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false, - bool SpillToPhysVGPRLane = false) const; + bool OnlyToVGPR = false) const; bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, @@ -166,10 +163,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { unsigned FIOperandNum, RegScavenger *RS) const override; - bool eliminateSGPRToVGPRSpillFrameIndex( - MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, - SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool SpillToPhysVGPRLane = false) const; + bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, + int FI, RegScavenger *RS, + SlotIndexes *Indexes = nullptr, + LiveIntervals *LIS = nullptr) const; StringRef getRegAsmName(MCRegister Reg) const override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index c2d7d605fbc2..444ee2efb6d2 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -205,6 +205,8 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)), ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0x10000))}; II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M)); + II.setMetadata(LLVMContext::MD_noundef, + MDNode::get(II.getContext(), std::nullopt)); return &II; } break; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index dae323ec24fb..8c642f61019a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1672,9 +1672,9 @@ getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, // FIXME: Needs fixup support. unsigned Value = 0; - int32_t tmp = (int32_t)MO1.getImm(); - if (tmp < 0) - tmp = abs(tmp); + auto tmp = static_cast(MO1.getImm()); + if (static_cast(tmp) < 0) + tmp = -tmp; else Value |= 256; // Set the ADD bit Value |= tmp & 255; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 088195994edd..67574403ca83 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -207,8 +207,32 @@ void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI, decltype(End) NextI; for (auto I = Begin; I != End; I = NextI) { NextI = std::next(I); - if (doSrcRegProp) + if (doSrcRegProp) { + // In situations like below it is not known if usage is a kill + // after setReg(): + // + // .-> %2:gpr = LD_imm64 @"llvm.t:0:0$0:0" + // | + // |`----------------. + // | %3:gpr = LDD %2:gpr, 0 + // | %4:gpr = ADD_rr %0:gpr(tied-def 0), killed %3:gpr <--- (1) + // | %5:gpr = LDD killed %4:gpr, 0 ^^^^^^^^^^^^^ + // | STD killed %5:gpr, %1:gpr, 0 this is I + // `----------------. + // %6:gpr = LDD %2:gpr, 0 + // %7:gpr = ADD_rr %0:gpr(tied-def 0), killed %6:gpr <--- (2) + // %8:gpr = LDD killed %7:gpr, 0 ^^^^^^^^^^^^^ + // STD killed %8:gpr, %1:gpr, 0 this is I + // + // Instructions (1) and (2) would be updated by setReg() to: + // + // ADD_rr %0:gpr(tied-def 0), %2:gpr + // + // %2:gpr is not killed at (1), so it is necessary to remove kill flag + // from I. I->setReg(SrcReg); + I->setIsKill(false); + } // The candidate needs to have a unique definition. if (IsAma && MRI->getUniqueVRegDef(I->getReg())) diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 485ba88a4654..3c1422b0e1a2 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -1368,6 +1368,8 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) { // been generated, construct one based on function signature. if (LineInfoGenerated == false) { auto *S = MI->getMF()->getFunction().getSubprogram(); + if (!S) + return; MCSymbol *FuncLabel = Asm->getFunctionBegin(); constructLineInfo(S, FuncLabel, S->getLine(), 0); LineInfoGenerated = true; diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 56fdf19a0720..4f93cdaaa137 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1544,24 +1544,58 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) { Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64())); // Check whether this is a TLS call expression - bool TLSCall = false; - if (const MCSymbolRefExpr *Ref = dyn_cast(EVal)) - TLSCall = Ref->getSymbol().getName() == "__tls_get_addr"; + const char TlsGetAddr[] = "__tls_get_addr"; + bool TlsCall = false; + const MCExpr *TlsCallAddend = nullptr; + if (auto *Ref = dyn_cast(EVal)) { + TlsCall = Ref->getSymbol().getName() == TlsGetAddr; + } else if (auto *Bin = dyn_cast(EVal); + Bin && Bin->getOpcode() == MCBinaryExpr::Add) { + if (auto *Ref = dyn_cast(Bin->getLHS())) { + TlsCall = Ref->getSymbol().getName() == TlsGetAddr; + TlsCallAddend = Bin->getRHS(); + } + } - if (TLSCall && parseOptionalToken(AsmToken::LParen)) { + if (TlsCall && parseOptionalToken(AsmToken::LParen)) { const MCExpr *TLSSym; - S = Parser.getTok().getLoc(); + const SMLoc S2 = Parser.getTok().getLoc(); if (ParseExpression(TLSSym)) - return Error(S, "invalid TLS call expression"); + return Error(S2, "invalid TLS call expression"); + E = Parser.getTok().getLoc(); if (parseToken(AsmToken::RParen, "expected ')'")) return true; - E = Parser.getTok().getLoc(); + // PPC32 allows bl __tls_get_addr[+a](x@tlsgd)@plt+b. Parse "@plt[+b]". + if (!isPPC64() && parseOptionalToken(AsmToken::At)) { + AsmToken Tok = getTok(); + if (!(parseOptionalToken(AsmToken::Identifier) && + Tok.getString().compare_insensitive("plt") == 0)) + return Error(Tok.getLoc(), "expected 'plt'"); + EVal = MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_PLT, + getContext()); + if (parseOptionalToken(AsmToken::Plus)) { + const MCExpr *Addend = nullptr; + SMLoc EndLoc; + if (parsePrimaryExpr(Addend, EndLoc)) + return true; + if (TlsCallAddend) // __tls_get_addr+a(x@tlsgd)@plt+b + TlsCallAddend = + MCBinaryExpr::createAdd(TlsCallAddend, Addend, getContext()); + else // __tls_get_addr(x@tlsgd)@plt+b + TlsCallAddend = Addend; + } + if (TlsCallAddend) + EVal = MCBinaryExpr::createAdd(EVal, TlsCallAddend, getContext()); + // Add a __tls_get_addr operand with addend a, b, or a+b. + Operands.back() = PPCOperand::CreateFromMCExpr( + EVal, S, Parser.getTok().getLoc(), false); + } Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64())); } // Otherwise, check for D-form memory operands - if (!TLSCall && parseOptionalToken(AsmToken::LParen)) { + if (!TlsCall && parseOptionalToken(AsmToken::LParen)) { S = Parser.getTok().getLoc(); int64_t IntVal; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index dbdfb6e906bb..13480da4e731 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -564,10 +564,10 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, // come at the _end_ of the expression. const MCOperand &Op = MI->getOperand(OpNo); const MCSymbolRefExpr *RefExp = nullptr; - const MCConstantExpr *ConstExp = nullptr; + const MCExpr *Rhs = nullptr; if (const MCBinaryExpr *BinExpr = dyn_cast(Op.getExpr())) { RefExp = cast(BinExpr->getLHS()); - ConstExp = cast(BinExpr->getRHS()); + Rhs = BinExpr->getRHS(); } else RefExp = cast(Op.getExpr()); @@ -584,8 +584,14 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, if (RefExp->getKind() != MCSymbolRefExpr::VK_None && RefExp->getKind() != MCSymbolRefExpr::VK_PPC_NOTOC) O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind()); - if (ConstExp != nullptr) - O << '+' << ConstExp->getValue(); + if (Rhs) { + SmallString<0> Buf; + raw_svector_ostream Tmp(Buf); + Rhs->print(Tmp, &MAI); + if (isdigit(Buf[0])) + O << '+'; + O << Buf; + } } /// showRegistersWithPercentPrefix - Check if this register name should be diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0ebfc007b3d7..96fd83ab6a7b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -7633,20 +7633,6 @@ void PPCDAGToDAGISel::PeepholePPC64() { case PPC::ADDItocL: Flags = PPCII::MO_TOC_LO; break; - case PPC::ADDItoc: - case PPC::ADDItoc8: - if (RequiresMod4Offset) { - if (GlobalAddressSDNode *GA = - dyn_cast(Base.getOperand(0))) { - const GlobalValue *GV = GA->getGlobal(); - Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); - // XMC_TD global that is underaligned being accessed with a DS form - // instruction. - if (Alignment < 4) - continue; - } - } - break; } SDValue ImmOpnd = Base.getOperand(1); @@ -7741,27 +7727,12 @@ void PPCDAGToDAGISel::PeepholePPC64() { } } - const unsigned BaseOpcode = Base.getMachineOpcode(); - // ADDItoc and ADDItoc8 are pseudos used exclusively by AIX small code - // model when a global is defined in the TOC. - const bool OpcodeIsAIXTocData = - BaseOpcode == PPC::ADDItoc || BaseOpcode == PPC::ADDItoc8; - if (FirstOp == 1) // Store - if (OpcodeIsAIXTocData) - (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), - Base.getOperand(0), Base.getOperand(1), - N->getOperand(3)); - else - (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, - Base.getOperand(0), N->getOperand(3)); + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); else // Load - if (OpcodeIsAIXTocData) - (void)CurDAG->UpdateNodeOperands(N, Base.getOperand(0), - Base.getOperand(1), N->getOperand(2)); - else - (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), - N->getOperand(2)); + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); if (UpdateHBase) (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 0081c0f5295a..224c7b281ac4 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -461,6 +461,12 @@ class XForm_tlb xo, dag OOL, dag IOL, string asmstr, let RST = 0; } +class XForm_tlbilx xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> { + bits<5> T; + let RST = T; +} + class XForm_attn opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 616f4e48cfb8..2992f78aa38a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4317,6 +4317,9 @@ def TLBSX : XForm_tlb<914, (outs), (ins gprc:$RA, gprc:$RB), "tlbsx $RA, $RB", def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$RA, gprc:$RB), "tlbivax $RA, $RB", IIC_LdStLoad>, Requires<[IsBookE]>; +def TLBILX : XForm_tlbilx<18, (outs), (ins u2imm:$T, gprc:$RA, gprc:$RB), + "tlbilx $T, $RA, $RB", IIC_LdStLoad>, Requires<[IsBookE]>; + def TLBRE : XForm_24_eieio<31, 946, (outs), (ins), "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>; @@ -4680,6 +4683,12 @@ def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>, def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>, Requires<[IsPPC4xx]>; +def : InstAlias<"tlbilxlpid", (TLBILX 0, R0, R0)>, Requires<[IsBookE]>; +def : InstAlias<"tlbilxpid", (TLBILX 1, R0, R0)>, Requires<[IsBookE]>; +def : InstAlias<"tlbilxva $RA, $RB", (TLBILX 3, gprc:$RA, gprc:$RB)>, + Requires<[IsBookE]>; +def : InstAlias<"tlbilxva $RB", (TLBILX 3, R0, gprc:$RB)>, Requires<[IsBookE]>; + def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 1f7dba66db35..976effb96adc 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -42,10 +42,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); } else { const GlobalValue *GV = MO.getGlobal(); - if (const GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->hasAttribute("toc-data")) - return TM.getSymbol(GV); - TM.getNameWithPrefix(Name, GV, Mang); } diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index e9f4daa62de3..b763191d980e 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,7 +42,7 @@ def P9Model : SchedMachineModel { // Power 9, or MMA, or paired vector mem ops, or PC relative mem ops, or // instructions introduced after ISA 3.0. let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA, - PairedVectorMemops, + PairedVectorMemops, IsBookE, PCRelativeMemops, IsISA3_1, IsISAFuture]; } diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index f7d11e921c7d..d2520d932ddf 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -225,16 +225,23 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const MachineOperand &AddrReg = MI->getOperand(OpNo); assert(MI->getNumOperands() > OpNo + 1 && "Expected additional operand"); - const MachineOperand &DispImm = MI->getOperand(OpNo + 1); + const MachineOperand &Offset = MI->getOperand(OpNo + 1); // All memory operands should have a register and an immediate operand (see // RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand). if (!AddrReg.isReg()) return true; - if (!DispImm.isImm()) + if (!Offset.isImm() && !Offset.isGlobal()) return true; - OS << DispImm.getImm() << "(" - << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")"; + MCOperand MCO; + if (!lowerOperand(Offset, MCO)) + return true; + + if (Offset.isImm()) + OS << MCO.getImm(); + else if (Offset.isGlobal()) + OS << *MCO.getExpr(); + OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")"; return false; } diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 59f1e8319ae7..d10bba26023f 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -572,6 +572,15 @@ bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB, if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) && !(BNEOp0 == CmpValReg && BNEOp1 == DestReg)) return false; + + // Make sure the branch is the only user of the AND. + if (MaskReg.isValid()) { + if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill()) + return false; + if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill()) + return false; + } + ToErase.push_back(&*MBBI); LoopHeadBNETarget = MBBI->getOperand(2).getMBB(); MBBI = skipDebugInstructionsForward(std::next(MBBI), E); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index ca2d9474d1ed..f312cc8129dd 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -262,22 +262,16 @@ static unsigned getPushPopEncoding(const Register MaxReg) { // Get the max reg of Push/Pop for restoring callee saved registers. static Register getMaxPushPopReg(const MachineFunction &MF, - const std::vector &CSI, - unsigned &PushPopRegs) { + const std::vector &CSI) { Register MaxPushPopReg = RISCV::NoRegister; - PushPopRegs = 0; for (auto &CS : CSI) { Register Reg = CS.getReg(); - if (RISCV::PGPRRegClass.contains(Reg)) { + if (RISCV::PGPRRegClass.contains(Reg)) MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id()); - PushPopRegs += 1; - } } // if rlist is {rs, s0-s10}, then s11 will also be included - if (MaxPushPopReg == RISCV::X26) { + if (MaxPushPopReg == RISCV::X26) MaxPushPopReg = RISCV::X27; - PushPopRegs = 13; - } return MaxPushPopReg; } @@ -581,11 +575,18 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, int64_t Offset; // Offsets for objects with fixed locations (IE: those saved by libcall) are // simply calculated from the frame index. - if (FrameIdx < 0) - Offset = FrameIdx * (int64_t) STI.getXLen() / 8; - else + if (FrameIdx < 0) { + if (RVFI->isPushable(MF)) { + // Callee-saved register stored by Zcmp push is in reverse order. + Offset = -(FrameIdx + RVFI->getRVPushRegs() + 1) * + (int64_t)STI.getXLen() / 8; + } else { + Offset = FrameIdx * (int64_t)STI.getXLen() / 8; + } + } else { Offset = MFI.getObjectOffset(Entry.getFrameIdx()) - RVFI->getLibCallStackSize(); + } Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); @@ -771,7 +772,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, if (FirstSPAdjustAmount) StackSize = FirstSPAdjustAmount; - if (RVFI->isPushable(MF) && MBBI->getOpcode() == RISCV::CM_POP) { + if (RVFI->isPushable(MF) && MBBI != MBB.end() && + MBBI->getOpcode() == RISCV::CM_POP) { // Use available stack adjustment in pop instruction to deallocate stack // space. unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8); @@ -1325,10 +1327,11 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( // Emit CM.PUSH with base SPimm & evaluate Push stack RISCVMachineFunctionInfo *RVFI = MF->getInfo(); if (RVFI->isPushable(*MF)) { - unsigned PushPopRegs = 0; - Register MaxReg = getMaxPushPopReg(*MF, CSI, PushPopRegs); - RVFI->setRVPushRegs(PushPopRegs); - RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushPopRegs, 16)); + Register MaxReg = getMaxPushPopReg(*MF, CSI); + unsigned PushedRegNum = + getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1; + RVFI->setRVPushRegs(PushedRegNum); + RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16)); if (MaxReg != RISCV::NoRegister) { // Use encoded number to represent registers to spill. @@ -1340,7 +1343,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( PushBuilder.addImm((int64_t)RegEnc); PushBuilder.addImm(0); - for (unsigned i = 0; i < PushPopRegs; i++) + for (unsigned i = 0; i < PushedRegNum; i++) PushBuilder.addUse(AllPopRegs[i], RegState::Implicit); } } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cafce628cf6a..aa20409da4e2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3414,6 +3414,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // Because N and True must have the same merge operand (or True's operand is // implicit_def), the "effective" body is the minimum of their VLs. + SDValue OrigVL = VL; VL = GetMinVL(TrueVL, VL); if (!VL) return false; @@ -3461,7 +3462,17 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { "Expected instructions with mask have a tied dest."); #endif - uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; + // Use a tumu policy, relaxing it to tail agnostic provided that the merge + // operand is undefined. + // + // However, if the VL became smaller than what the vmerge had originally, then + // elements past VL that were previously in the vmerge's body will have moved + // to the tail. In that case we always need to use tail undisturbed to + // preserve them. + bool MergeVLShrunk = VL != OrigVL; + uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) + ? RISCVII::TAIL_AGNOSTIC + : /*TUMU*/ 0; SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f49c5011607f..f030982cb815 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3113,12 +3113,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, bool Negate = false; int64_t SplatStepVal = StepNumerator; unsigned StepOpcode = ISD::MUL; - if (StepNumerator != 1) { - if (isPowerOf2_64(std::abs(StepNumerator))) { - Negate = StepNumerator < 0; - StepOpcode = ISD::SHL; - SplatStepVal = Log2_64(std::abs(StepNumerator)); - } + // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it + // anyway as the shift of 63 won't fit in uimm5. + if (StepNumerator != 1 && StepNumerator != INT64_MIN && + isPowerOf2_64(std::abs(StepNumerator))) { + Negate = StepNumerator < 0; + StepOpcode = ISD::SHL; + SplatStepVal = Log2_64(std::abs(StepNumerator)); } // Only emit VIDs with suitably-small steps/addends. We use imm5 is a @@ -5368,9 +5369,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (isa(RHS)) { int64_t Imm = cast(RHS)->getSExtValue(); if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { - // X > -1 should have been replaced with false. - assert((CCVal != ISD::SETUGT || Imm != -1) && - "Missing canonicalization"); + // If this is an unsigned compare and the constant is -1, incrementing + // the constant would change behavior. The result should be false. + if (CCVal == ISD::SETUGT && Imm == -1) + return DAG.getConstant(0, DL, VT); // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. CCVal = ISD::getSetCCSwappedOperands(CCVal); SDValue SetCC = DAG.getSetCC( @@ -11710,7 +11712,11 @@ static SDValue performFP_TO_INTCombine(SDNode *N, return SDValue(); RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); - if (FRM == RISCVFPRndMode::Invalid) + // If the result is invalid, we didn't find a foldable instruction. + // If the result is dynamic, then we found an frint which we don't yet + // support. It will cause 7 to be written to the FRM CSR for vector. + // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below. + if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN) return SDValue(); SDLoc DL(N); diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp index a93e750eadc6..f885adca669f 100644 --- a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp @@ -132,7 +132,8 @@ bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) { for (auto &MBB : Fn) { MachineBasicBlock::iterator MBBI = containsPop(MBB); MachineBasicBlock::iterator NextI = next_nodbg(MBBI, MBB.end()); - if (MBBI != MBB.end() && NextI->getOpcode() == RISCV::PseudoRET) + if (MBBI != MBB.end() && NextI != MBB.end() && + NextI->getOpcode() == RISCV::PseudoRET) Modified |= usePopRet(MBBI, NextI, adjustRetVal(MBBI)); } return Modified; diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 3d602e7e4376..9af8b17edcc5 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -1743,6 +1743,22 @@ let hasSideEffects = 1 in { } } +// Section A.11 - DONE and RETRY +// Section A.47 - SAVED and RESTORED +let Predicates = [HasV9], rs1 = 0, rs2 = 0 in { + let rd = 0 in + def DONE : F3_1<2, 0b111110, (outs), (ins), "done", []>; + + let rd = 1 in + def RETRY : F3_1<2, 0b111110, (outs), (ins), "retry", []>; + + let rd = 0 in + def SAVED : F3_1<2, 0b110001, (outs), (ins), "saved", []>; + + let rd = 1 in + def RESTORED : F3_1<2, 0b110001, (outs), (ins), "restored", []>; +} + // Section A.42 - Prefetch Data let Predicates = [HasV9] in { def PREFETCHr : F3_1<3, 0b101101, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 821efc1b758b..abac7a9bfe0a 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -1152,6 +1152,11 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } } + // Type legalization (via getNumberOfParts) can't handle structs + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + unsigned NumOps = (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src)); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 0f677b8a4afc..05cc50712c52 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -659,6 +659,13 @@ def TuningFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; +def TuningPreferNoGather + : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", + "Prefer no gather instructions">; +def TuningPreferNoScatter + : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", + "Prefer no scatter instructions">; + def TuningPrefer128Bit : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", "Prefer 128-bit AVX instructions">; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c14d51bb4fa5..d9750ea22e2b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1648,7 +1648,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); } - for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) { + for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) { setOperationAction(ISD::FP_EXTEND, VT, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); } @@ -1656,9 +1656,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); } - - setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); } // This block controls legalization of the mask vector sizes that are @@ -1975,8 +1972,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setF16Action(MVT::v32f16, Expand); setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Custom); for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); @@ -2197,9 +2194,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom); @@ -2249,9 +2246,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE @@ -2275,8 +2272,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) { - addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass); - addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass); + addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass + : &X86::VR256RegClass); // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT. // Set the operation action Custom to do the customization later. @@ -2291,6 +2290,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } + setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom); addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); } @@ -2302,6 +2302,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMUL, MVT::v32bf16, Expand); setOperationAction(ISD::FDIV, MVT::v32bf16, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom); } @@ -11363,7 +11364,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); - if (VT.getVectorElementType() == MVT::bf16 && Subtarget.hasBF16()) + if (VT.getVectorElementType() == MVT::bf16 && + (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) @@ -14795,13 +14797,9 @@ static bool isShuffleFoldableLoad(SDValue V) { } template -static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { - return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16(); -} - -template -bool X86TargetLowering::isSoftFP16(T VT) const { - return ::isSoftFP16(VT, Subtarget); +static bool isSoftF16(T VT, const X86Subtarget &Subtarget) { + T EltVT = VT.getScalarType(); + return EltVT == MVT::bf16 || (EltVT == MVT::f16 && !Subtarget.hasFP16()); } /// Try to lower insertion of a single element into a zero vector. @@ -14817,7 +14815,7 @@ static SDValue lowerShuffleAsElementInsertion( unsigned NumElts = VT.getVectorNumElements(); unsigned EltBits = VT.getScalarSizeInBits(); - if (isSoftFP16(EltVT, Subtarget)) + if (isSoftF16(EltVT, Subtarget)) return SDValue(); int V2Index = @@ -20374,7 +20372,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - if (isSoftFP16(VT)) { + if (isSoftF16(VT, Subtarget)) { MVT NVT = VT.changeVectorElementTypeToInteger(); return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond, DAG.getBitcast(NVT, LHS), @@ -21852,7 +21850,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - if (isSoftFP16(VT)) + if (isSoftF16(VT, Subtarget)) return promoteXINT_TO_FP(Op, DAG); else if (isLegalConversion(SrcVT, true, Subtarget)) return Op; @@ -22357,7 +22355,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (DstVT == MVT::f128) return SDValue(); - if (isSoftFP16(DstVT)) + if (isSoftF16(DstVT, Subtarget)) return promoteXINT_TO_FP(Op, DAG); else if (isLegalConversion(SrcVT, false, Subtarget)) return Op; @@ -23314,7 +23312,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Res; - if (isSoftFP16(SrcVT)) { + if (isSoftF16(SrcVT, Subtarget)) { MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; if (IsStrict) return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, @@ -23743,7 +23741,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { // This code is only for floats and doubles. Fall back to generic code for // anything else. - if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftFP16(SrcVT)) + if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftF16(SrcVT, Subtarget)) return SDValue(); EVT SatVT = cast(Node->getOperand(1))->getVT(); @@ -23888,6 +23886,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { !Subtarget.getTargetTriple().isOSDarwin())) return SDValue(); + if ((SVT == MVT::v8f16 && Subtarget.hasF16C()) || + (SVT == MVT::v16f16 && Subtarget.useAVX512Regs())) + return Op; + if (SVT == MVT::f16) { if (Subtarget.hasFP16()) return Op; @@ -23960,7 +23962,25 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { if (!SVT.isVector()) return Op; + if (SVT.getVectorElementType() == MVT::bf16) { + // FIXME: Do we need to support strict FP? + assert(!IsStrict && "Strict FP doesn't support BF16"); + if (VT.getVectorElementType() == MVT::f64) { + MVT TmpVT = VT.changeVectorElementType(MVT::f32); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, + DAG.getNode(ISD::FP_EXTEND, DL, TmpVT, In)); + } + assert(VT.getVectorElementType() == MVT::f32 && "Unexpected fpext"); + MVT NVT = SVT.changeVectorElementType(MVT::i32); + In = DAG.getBitcast(SVT.changeTypeToInteger(), In); + In = DAG.getNode(ISD::ZERO_EXTEND, DL, NVT, In); + In = DAG.getNode(ISD::SHL, DL, NVT, In, DAG.getConstant(16, DL, NVT)); + return DAG.getBitcast(VT, In); + } + if (SVT.getVectorElementType() == MVT::f16) { + if (Subtarget.hasFP16() && isTypeLegal(SVT)) + return Op; assert(Subtarget.hasF16C() && "Unexpected features!"); if (SVT == MVT::v2f16) In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In, @@ -24033,6 +24053,12 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return Res; } + if (VT.getScalarType() == MVT::bf16) { + if (SVT.getScalarType() == MVT::f32 && isTypeLegal(VT)) + return Op; + return SDValue(); + } + if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) { if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32) return SDValue(); @@ -25676,7 +25702,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, if (isFP) { MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); - if (isSoftFP16(EltVT, Subtarget)) + if (isSoftF16(EltVT, Subtarget)) return SDValue(); bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; @@ -26241,7 +26267,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(IsStrict ? 3 : 2))->get(); - if (isSoftFP16(Op0.getValueType())) + if (isSoftF16(Op0.getValueType(), Subtarget)) return SDValue(); // Handle f128 first, since one possible outcome is a normal integer @@ -26434,7 +26460,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op1.getSimpleValueType(); SDValue CC; - if (isSoftFP16(VT)) { + if (isSoftF16(VT, Subtarget)) { MVT NVT = VT.changeTypeToInteger(); return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond, DAG.getBitcast(NVT, Op1), @@ -26506,7 +26532,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (Cond.getOpcode() == ISD::SETCC && - !isSoftFP16(Cond.getOperand(0).getSimpleValueType())) { + !isSoftF16(Cond.getOperand(0).getSimpleValueType(), Subtarget)) { if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; // If the condition was updated, it's possible that the operands of the @@ -27196,7 +27222,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // Bail out when we don't have native compare instructions. if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0).getValueType() != MVT::f128 && - !isSoftFP16(Cond.getOperand(0).getValueType())) { + !isSoftF16(Cond.getOperand(0).getValueType(), Subtarget)) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -34983,7 +35009,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT SrcVT = Src.getValueType(); SDValue Res; - if (isSoftFP16(SrcVT)) { + if (isSoftF16(SrcVT, Subtarget)) { EVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; if (IsStrict) { Res = @@ -47383,7 +47409,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && VT != MVT::f128 && !isSoftFP16(VT, Subtarget) && + VT != MVT::f80 && VT != MVT::f128 && !isSoftF16(VT, Subtarget) && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget.hasSSE2() || (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { @@ -47700,7 +47726,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } // Early exit check - if (!TLI.isTypeLegal(VT) || isSoftFP16(VT, Subtarget)) + if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget)) return SDValue(); if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) @@ -54550,7 +54576,7 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - if (Subtarget.useSoftFloat() || isSoftFP16(VT, Subtarget)) + if (Subtarget.useSoftFloat() || isSoftF16(VT, Subtarget)) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 250df82a30c2..047d8f021047 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1749,8 +1749,6 @@ namespace llvm { bool needsCmpXchgNb(Type *MemType) const; - template bool isSoftFP16(T VT) const; - void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ecb5c3e91240..b5dac7a0c65a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12976,6 +12976,11 @@ let Predicates = [HasBF16, HasVLX] in { def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), (VPBROADCASTWZ256rr VR128X:$src)>; + def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), + (VCVTNEPS2BF16Z256rr VR256X:$src)>; + def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), + (VCVTNEPS2BF16Z256rm addr:$src)>; + // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. } @@ -12985,6 +12990,11 @@ let Predicates = [HasBF16] in { def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), (VPBROADCASTWZrr VR128X:$src)>; + + def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), + (VCVTNEPS2BF16Zrr VR512:$src)>; + def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), + (VCVTNEPS2BF16Zrm addr:$src)>; // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6c57eceab376..a6fcc804e1d0 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8289,6 +8289,11 @@ let Predicates = [HasAVXNECONVERT] in { f256mem>, T8PS; let checkVEXPredicate = 1 in defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; + + def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))), + (VCVTNEPS2BF16Yrr VR256:$src)>; + def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), + (VCVTNEPS2BF16Yrm addr:$src)>; } def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 17981b3b9374..129a2646dbb7 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5944,9 +5944,7 @@ bool X86TTIImpl::forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX()))); } -bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { - if (!supportsGather()) - return false; +bool X86TTIImpl::isLegalMaskedGatherScatter(Type *DataTy, Align Alignment) { Type *ScalarTy = DataTy->getScalarType(); if (ScalarTy->isPointerTy()) return true; @@ -5961,6 +5959,12 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { return IntWidth == 32 || IntWidth == 64; } +bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { + if (!supportsGather() || !ST->preferGather()) + return false; + return isLegalMaskedGatherScatter(DataTy, Alignment); +} + bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const { @@ -5996,9 +6000,9 @@ bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) { // AVX2 doesn't support scatter - if (!ST->hasAVX512()) + if (!ST->hasAVX512() || !ST->preferScatter()) return false; - return isLegalMaskedGather(DataType, Alignment); + return isLegalMaskedGatherScatter(DataType, Alignment); } bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 89c7916260a4..0fa0d240a548 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -261,6 +261,7 @@ class X86TTIImpl : public BasicTTIImplBase { bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { return forceScalarizeMaskedGather(VTy, Alignment); } + bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment); bool isLegalMaskedGather(Type *DataType, Align Alignment); bool isLegalMaskedScatter(Type *DataType, Align Alignment); bool isLegalMaskedExpandLoad(Type *DataType); diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 518c859b11cc..81309280a44b 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1241,8 +1241,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VP2INTERSECT); + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. bool HasLeaf7Subleaf1 = - MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512BF16); @@ -1750,8 +1753,11 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. bool HasLeaf7Subleaf1 = - MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 72781513ff12..772d24c5ce3d 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -16,9 +16,6 @@ using namespace llvm; using namespace llvm::LoongArch; -StringRef Arch; -StringRef TuneCPU; - const FeatureInfo AllFeatures[] = { #define LOONGARCH_FEATURE(NAME, KIND) {NAME, KIND}, #include "llvm/TargetParser/LoongArchTargetParser.def" @@ -50,11 +47,9 @@ bool LoongArch::getArchFeatures(StringRef Arch, return false; } -bool LoongArch::isValidTuneCPUName(StringRef TuneCPU) { - return isValidArchName(TuneCPU); -} +bool LoongArch::isValidCPUName(StringRef Name) { return isValidArchName(Name); } -void LoongArch::fillValidTuneCPUList(SmallVectorImpl &Values) { +void LoongArch::fillValidCPUList(SmallVectorImpl &Values) { for (const auto A : AllArchs) Values.emplace_back(A.Name); } @@ -63,11 +58,3 @@ StringRef LoongArch::getDefaultArch(bool Is64Bit) { // TODO: use a real 32-bit arch name. return Is64Bit ? "loongarch64" : ""; } - -void LoongArch::setArch(StringRef Name) { Arch = Name; } - -StringRef LoongArch::getArch() { return Arch; } - -void LoongArch::setTuneCPU(StringRef Name) { TuneCPU = Name; } - -StringRef LoongArch::getTuneCPU() { return TuneCPU; } diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 34c8a380448e..503ce019dc84 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -29,7 +28,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { return true; } +/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids +/// pessimistic codegen that has to account for setting errno and can enable +/// vectorization. +static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT) { + // Match a call to sqrt mathlib function. + auto *Call = dyn_cast(&I); + if (!Call) + return false; + + Module *M = Call->getModule(); + LibFunc Func; + if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func)) + return false; + + if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl) + return false; + + // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created + // (because NNAN or the operand arg must not be less than -0.0) and (2) we + // would not end up lowering to a libcall anyway (which could change the value + // of errno), then: + // (1) errno won't be set. + // (2) it is safe to convert this to an intrinsic call. + Type *Ty = Call->getType(); + Value *Arg = Call->getArgOperand(0); + if (TTI.haveFastSqrt(Ty) && + (Call->hasNoNaNs() || + cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I, + &DT))) { + IRBuilder<> Builder(&I); + IRBuilderBase::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Call->getFastMathFlags()); + + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); + Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); + I.replaceAllUsesWith(NewSqrt); + + // Explicitly erase the old call because a call with side effects is not + // trivially dead. + I.eraseFromParent(); + return true; + } + + return false; +} + // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all // the numbers from 0 to \p InputBits that should represent cttz results. @@ -869,159 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { return true; } -/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids -/// pessimistic codegen that has to account for setting errno and can enable -/// vectorization. -static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree &DT) { - Module *M = Call->getModule(); - - // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created - // (because NNAN or the operand arg must not be less than -0.0) and (2) we - // would not end up lowering to a libcall anyway (which could change the value - // of errno), then: - // (1) errno won't be set. - // (2) it is safe to convert this to an intrinsic call. - Type *Ty = Call->getType(); - Value *Arg = Call->getArgOperand(0); - if (TTI.haveFastSqrt(Ty) && - (Call->hasNoNaNs() || - cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call, - &DT))) { - IRBuilder<> Builder(Call); - IRBuilderBase::FastMathFlagGuard Guard(Builder); - Builder.setFastMathFlags(Call->getFastMathFlags()); - - Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); - Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); - Call->replaceAllUsesWith(NewSqrt); - - // Explicitly erase the old call because a call with side effects is not - // trivially dead. - Call->eraseFromParent(); - return true; - } - - return false; -} - -/// Try to expand strcmp(P, "x") calls. -static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); - - // Trivial cases are optimized during inst combine - if (Str1P == Str2P) - return false; - - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); - - Value *NonConstantP = nullptr; - StringRef ConstantStr; - - if (!HasStr1 && HasStr2 && Str2.size() == 1) { - NonConstantP = Str1P; - ConstantStr = Str2; - } else if (!HasStr2 && HasStr1 && Str1.size() == 1) { - NonConstantP = Str2P; - ConstantStr = Str1; - } else { - return false; - } - - // Check if strcmp result is only used in a comparison with zero - if (!isOnlyUsedInZeroComparison(CI)) - return false; - - // For strcmp(P, "x") do the following transformation: - // - // (before) - // dst = strcmp(P, "x") - // - // (after) - // v0 = P[0] - 'x' - // [if v0 == 0] - // v1 = P[1] - // dst = phi(v0, v1) - // - - IRBuilder<> B(CI->getParent()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - - Type *RetType = CI->getType(); - - B.SetInsertPoint(CI); - BasicBlock *InitialBB = B.GetInsertBlock(); - Value *Str1FirstCharacterValue = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType); - Value *Str2FirstCharacterValue = - ConstantInt::get(RetType, static_cast(ConstantStr[0])); - Value *FirstCharacterSub = - B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue); - Value *IsFirstCharacterSubZero = - B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0)); - Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen( - IsFirstCharacterSubZero, CI, /*Unreachable*/ false, - /*BranchWeights*/ nullptr, &DTU); - - B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator); - B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero"); - BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock(); - Value *Str1SecondCharacterValue = B.CreateZExt( - B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64( - B.getInt8Ty(), NonConstantP, 1)), - RetType); - - B.SetInsertPoint(CI); - B.GetInsertBlock()->setName("strcmp_expand_sub_join"); - - PHINode *Result = B.CreatePHI(RetType, 2); - Result->addIncoming(FirstCharacterSub, InitialBB); - Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB); - - CI->replaceAllUsesWith(Result); - CI->eraseFromParent(); - - MadeCFGChange = true; - - return true; -} - -static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, DominatorTree &DT, - AssumptionCache &AC, bool &MadeCFGChange) { - CallInst *CI = dyn_cast(&I); - if (!CI) - return false; - - LibFunc Func; - Module *M = I.getModule(); - if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func)) - return false; - - switch (Func) { - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - return foldSqrt(CI, TTI, TLI, AC, DT); - case LibFunc_strcmp: - return expandStrcmp(CI, DT, MadeCFGChange); - default: - break; - } - - return false; -} - /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, - AssumptionCache &AC, bool &MadeCFGChange) { + AssumptionCache &AC) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -1046,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. - MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange); + MadeChange |= foldSqrt(I, TTI, TLI, AC, DT); } } @@ -1062,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, - AliasAnalysis &AA, bool &ChangedCFG) { + AliasAnalysis &AA) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC); return MadeChange; } @@ -1078,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F, auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); auto &AA = AM.getResult(F); - - bool MadeCFGChange = false; - - if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) { + if (!runImpl(F, AC, TTI, TLI, DT, AA)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } - // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; - - if (MadeCFGChange) - PA.preserve(); - else - PA.preserveSet(); - + PA.preserveSet(); return PA; } diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index d78ab1c1ea28..d0606c15f3d5 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB, for (auto *DA : It->second) Visited.insert(DA->getParent()); + SmallPtrSet EscapingBBs; + for (auto *U : CB->users()) { + // The use from coroutine intrinsics are not a problem. + if (isa(U)) + continue; + + // Think all other usages may be an escaping candidate conservatively. + // + // Note that the major user of switch ABI coroutine (the C++) will store + // resume.fn, destroy.fn and the index to the coroutine frame immediately. + // So the parent of the coro.begin in C++ will be always escaping. + // Then we can't get any performance benefits for C++ by improving the + // precision of the method. + // + // The reason why we still judge it is we want to make LLVM Coroutine in + // switch ABIs to be self contained as much as possible instead of a + // by-product of C++20 Coroutines. + EscapingBBs.insert(cast(U)->getParent()); + } + + bool PotentiallyEscaped = false; + do { const auto *BB = Worklist.pop_back_val(); if (!Visited.insert(BB).second) continue; - if (TIs.count(BB)) - return true; + + // A Path insensitive marker to test whether the coro.begin escapes. + // It is intentional to make it path insensitive while it may not be + // precise since we don't want the process to be too slow. + PotentiallyEscaped |= EscapingBBs.count(BB); + + if (TIs.count(BB)) { + if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped) + return true; + + // If the function ends with the exceptional terminator, the memory used + // by the coroutine frame can be released by stack unwinding + // automatically. So we can think the coro.begin doesn't escape if it + // exits the function by exceptional terminator. + + continue; + } // Conservatively say that there is potentially a path. if (!--Limit) @@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // memory location storing that value and not the virtual register. SmallPtrSet Terminators; - // First gather all of the non-exceptional terminators for the function. + // First gather all of the terminators for the function. // Consider the final coro.suspend as the real terminator when the current // function is a coroutine. - for (BasicBlock &B : *F) { - auto *TI = B.getTerminator(); - if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() && - !isa(TI)) - Terminators.insert(&B); - } + for (BasicBlock &B : *F) { + auto *TI = B.getTerminator(); + + if (TI->getNumSuccessors() != 0 || isa(TI)) + continue; + + Terminators.insert(&B); + } // Filter out the coro.destroy that lie along exceptional paths. SmallPtrSet ReferencedCoroBegins; for (const auto &It : DestroyAddr) { - // If there is any coro.destroy dominates all of the terminators for the - // coro.begin, we could know the corresponding coro.begin wouldn't escape. - for (Instruction *DA : It.second) { - if (llvm::all_of(Terminators, [&](auto *TI) { - return DT.dominates(DA, TI->getTerminator()); - })) { - ReferencedCoroBegins.insert(It.first); - break; - } - } - - // Whether there is any paths from coro.begin to Terminators which not pass - // through any of the coro.destroys. + // If every terminators is dominated by coro.destroy, we could know the + // corresponding coro.begin wouldn't escape. + // + // Otherwise hasEscapePath would decide whether there is any paths from + // coro.begin to Terminators which not pass through any of the + // coro.destroys. // // hasEscapePath is relatively slow, so we avoid to run it as much as // possible. - if (!ReferencedCoroBegins.count(It.first) && + if (llvm::all_of(Terminators, + [&](auto *TI) { + return llvm::any_of(It.second, [&](auto *DA) { + return DT.dominates(DA, TI->getTerminator()); + }); + }) || !hasEscapePath(It.first, Terminators)) ReferencedCoroBegins.insert(It.first); } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index afd6e034f46d..767b7c7defbb 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -906,7 +906,7 @@ InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { bool IsCastOpRHS = (CastOp == RHS); - bool IsZExt = isa(CastOp); + bool IsZExt = isa(CastOp); Constant *C; if (IsTrueArm) { diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 3e3be536defc..597cec8e61c9 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1777,6 +1777,20 @@ void CHR::cloneScopeBlocks(CHRScope *Scope, BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); NewBlocks.push_back(NewBB); VMap[BB] = NewBB; + + // Unreachable predecessors will not be cloned and will not have an edge + // to the cloned block. As such, also remove them from any phi nodes. + // To avoid iterator invalidation, first collect the dead predecessors + // from the first phi node, and then perform the actual removal. + if (auto *FirstPN = dyn_cast(NewBB->begin())) { + SmallVector DeadPreds; + for (BasicBlock *Pred : FirstPN->blocks()) + if (!DT.isReachableFromEntry(Pred)) + DeadPreds.push_back(Pred); + for (PHINode &PN : make_early_inc_range(NewBB->phis())) + for (BasicBlock *Pred : DeadPreds) + PN.removeIncomingValue(Pred); + } } // Place the cloned blocks right after the original blocks (right before the diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 21f0b1a92293..75adcabc0d34 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -898,7 +898,9 @@ bool GCOVProfiler::emitProfileNotes( if (Line == Loc.getLine()) continue; Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope())) + MDNode *Scope = Loc.getScope(); + // TODO: Handle blocks from another file due to #line, #include, etc. + if (isa(Scope) || SP != getDISubprogram(Scope)) continue; GCOVLines &Lines = Block.getFile(Filename); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 15628d32280d..2b88dd08d88b 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -424,7 +424,7 @@ static Decomposition decompose(Value *V, return MergeResults(Op0, Op1, IsSigned); ConstantInt *CI; - if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI)))) { + if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) { auto Result = decompose(Op0, Preconditions, IsSigned, DL); Result.mul(CI->getSExtValue()); return Result; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 68642a01b37c..00937e0d734a 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); -STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { @@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, return true; } - // If this is a load-store pair from a stack slot to a stack slot, we - // might be able to perform the stack-move optimization just as we do for - // memcpys from an alloca to an alloca. - if (auto *DestAlloca = dyn_cast(SI->getPointerOperand())) { - if (auto *SrcAlloca = dyn_cast(LI->getPointerOperand())) { - if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, - DL.getTypeStoreSize(T), BAA)) { - // Avoid invalidating the iterator. - BBI = SI->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; - } - } - } - return false; } @@ -1426,217 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, return true; } -// Attempts to optimize the pattern whereby memory is copied from an alloca to -// another alloca, where the two allocas don't have conflicting mod/ref. If -// successful, the two allocas can be merged into one and the transfer can be -// deleted. This pattern is generated frequently in Rust, due to the ubiquity of -// move operations in that language. -// -// Once we determine that the optimization is safe to perform, we replace all -// uses of the destination alloca with the source alloca. We also "shrink wrap" -// the lifetime markers of the single merged alloca to before the first use -// and after the last use. Note that the "shrink wrapping" procedure is a safe -// transformation only because we restrict the scope of this optimization to -// allocas that aren't captured. -bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, - AllocaInst *DestAlloca, - AllocaInst *SrcAlloca, uint64_t Size, - BatchAAResults &BAA) { - LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" - << *Store << "\n"); - - // Make sure the two allocas are in the same address space. - if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { - LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); - return false; - } - - // 1. Check that copy is full. Calculate the static size of the allocas to be - // merged, bail out if we can't. - const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); - std::optional SrcSize = SrcAlloca->getAllocationSize(DL); - if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); - return false; - } - std::optional DestSize = DestAlloca->getAllocationSize(DL); - if (!DestSize || DestSize->isScalable() || - Size != DestSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); - return false; - } - - // 2-1. Check that src and dest are static allocas, which are not affected by - // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() || - SrcAlloca->getParent() != Load->getParent() || - SrcAlloca->getParent() != Store->getParent()) - return false; - - // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. - - SmallVector LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; - SmallSet NoAliasInstrs; - - // Recursively track the user and check whether modified alias exist. - auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { - bool CanBeNull, CanBeFreed; - return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); - }; - - auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref ModRefCallback) -> bool { - SmallVector Worklist; - Worklist.push_back(AI); - unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); - Worklist.reserve(MaxUsesToExplore); - SmallSet Visited; - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - for (const Use &U : I->uses()) { - if (Visited.size() >= MaxUsesToExplore) { - LLVM_DEBUG( - dbgs() - << "Stack Move: Exceeded max uses to see ModRef, bailing\n"); - return false; - } - if (!Visited.insert(&U).second) - continue; - switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { - case UseCaptureKind::MAY_CAPTURE: - return false; - case UseCaptureKind::PASSTHROUGH: - // Instructions cannot have non-instruction users. - Worklist.push_back(cast(U.getUser())); - continue; - case UseCaptureKind::NO_CAPTURE: { - auto *UI = cast(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; - if (UI->isLifetimeStartOrEnd()) { - // We note the locations of these intrinsic calls so that we can - // delete them later if the optimization succeeds, this is safe - // since both llvm.lifetime.start and llvm.lifetime.end intrinsics - // conceptually fill all the bytes of the alloca with an undefined - // value. - int64_t Size = cast(UI->getOperand(0))->getSExtValue(); - if (Size < 0 || Size == DestSize) { - LifetimeMarkers.push_back(UI); - continue; - } - } - if (UI->hasMetadata(LLVMContext::MD_noalias)) - NoAliasInstrs.insert(UI); - if (!ModRefCallback(UI)) - return false; - } - } - } - } - return true; - }; - - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics, - // from the alloca to the Store. - ModRefInfo DestModRef = ModRefInfo::NoModRef; - MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); - auto DestModRefCallback = [&](Instruction *UI) -> bool { - // We don't care about the store itself. - if (UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); - // FIXME: For multi-BB cases, we need to see reachability from it to - // store. - // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) - return false; - DestModRef |= BAA.getModRefInfo(UI, DestLoc); - - return true; - }; - - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) - return false; - - // 3. Check that, from after the Load to the end of the BB, - // 3-1. if the dest has any Mod, src has no Ref, and - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. - MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); - - auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); - if ((isModSet(DestModRef) && isRefSet(Res)) || - (isRefSet(DestModRef) && isModSet(Res))) - return false; - - return true; - }; - - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) - return false; - - // We can do the transformation. First, align the allocas appropriately. - SrcAlloca->setAlignment( - std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); - - // Merge the two allocas. - DestAlloca->replaceAllUsesWith(SrcAlloca); - eraseInstruction(DestAlloca); - - // Drop metadata on the source alloca. - SrcAlloca->dropUnknownNonDebugMetadata(); - - // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists. - if (!LifetimeMarkers.empty()) { - LLVMContext &C = SrcAlloca->getContext(); - IRBuilder<> Builder(C); - - ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); - // Create a new lifetime start marker before the first user of src or alloca - // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); - Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); - - // Create a new lifetime end marker after the last user of src or alloca - // users. - // FIXME: If the last user is the terminator for the bb, we can insert - // lifetime.end marker to the immidiate post-dominator, but currently do - // nothing. - if (!LastUser->isTerminator()) { - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); - } - - // Remove all other lifetime markers. - for (Instruction *I : LifetimeMarkers) - eraseInstruction(I); - } - - // As this transformation can cause memory accesses that didn't previously - // alias to begin to alias one another, we remove !noalias metadata from any - // uses of either alloca. This is conservative, but more precision doesn't - // seem worthwhile right now. - for (Instruction *I : NoAliasInstrs) - I->setMetadata(LLVMContext::MD_noalias, nullptr); - - LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); - NumStackMove++; - return true; -} - /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on @@ -1693,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( AnyClobber, MemoryLocation::getForSource(M), BAA); - // There are five possible optimizations we can do for memcpy: + // There are four possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. // c) memcpy from freshly alloca'd space or space that has just started // its lifetime copies undefined data, and we can therefore eliminate // the memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. - // e) elimination of memcpy via stack-move optimization. if (auto *MD = dyn_cast(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast(M->getLength())) { @@ -1719,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } if (auto *MDep = dyn_cast(MI)) - if (processMemCpyMemCpyDependence(M, MDep, BAA)) - return true; + return processMemCpyMemCpyDependence(M, MDep, BAA); if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); @@ -1739,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } - // If the transfer is from a stack slot to a stack slot, then we may be able - // to perform the stack-move optimization. See the comments in - // performStackMoveOptzn() for more details. - auto *DestAlloca = dyn_cast(M->getDest()); - if (!DestAlloca) - return false; - auto *SrcAlloca = dyn_cast(M->getSource()); - if (!SrcAlloca) - return false; - ConstantInt *Len = dyn_cast(M->getLength()); - if (Len == nullptr) - return false; - if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), - BAA)) { - // Avoid invalidating the iterator. - BBI = M->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - return false; } diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 4f1350e4ebb9..2031e70bee1d 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -675,6 +675,12 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { if (CI->isByValArgument(I)) { copyLocalTempOfByValueOperandIntoArguments(CI, I); + // When eliminating a tail call, we modify the values of the arguments. + // Therefore, if the byval parameter has a readonly attribute, we have to + // remove it. It is safe because, from the perspective of a caller, the + // byval parameter is always treated as "readonly," even if the readonly + // attribute is removed. + F.removeParamAttr(I, Attribute::ReadOnly); ArgumentPHIs[I]->addIncoming(F.getArg(I), BB); } else ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5b0951252c07..3ad97613fe7a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr, return ConstantInt::get(RetTy, Result); } +static bool isOnlyUsedInComparisonWithZero(Value *V) { + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast(U)) + if (Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, const DataLayout &DL) { - if (!isOnlyUsedInZeroComparison(CI)) + if (!isOnlyUsedInComparisonWithZero(CI)) return false; if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index aa924823e554..bc8e0413b339 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3775,7 +3775,7 @@ void GNUELFDumper::printRelRelaReloc(const Relocation &R, if (!Fields[4].Str.empty()) { if (RelAddend < 0) { Addend = " - "; - RelAddend = std::abs(RelAddend); + RelAddend = -static_cast(RelAddend); } else { Addend = " + "; } From 8092e001bcd76c0b9fec2311f3a515aa60d2ed07 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 11 Sep 2023 15:44:52 +0200 Subject: [PATCH 3/3] Vendor import of llvm-project branch release/17.x llvmorg-17.0.0-rc4-10-g0176e8729ea4. --- clang/include/clang/AST/ExprConcepts.h | 14 +++-- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/lib/Driver/Driver.cpp | 8 ++- clang/lib/Driver/ToolChains/AIX.cpp | 6 +++ clang/lib/Driver/ToolChains/Arch/X86.cpp | 8 +-- clang/lib/Driver/ToolChains/Arch/X86.h | 2 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- clang/lib/Frontend/FrontendAction.cpp | 5 ++ clang/lib/Sema/SemaExprCXX.cpp | 25 ++++++--- clang/lib/Sema/SemaTemplateInstantiate.cpp | 17 +++++- .../Inclusions/Stdlib/StdSymbolMap.inc | 54 +++++++++++++++++++ compiler-rt/lib/builtins/aarch64/lse.S | 40 ++++++++++++-- .../sanitizer_common_interceptors_format.inc | 16 ++++-- libcxx/include/__config | 36 +++++++++---- llvm/include/llvm/Analysis/LazyValueInfo.h | 3 ++ llvm/lib/Analysis/LazyValueInfo.cpp | 9 ++++ llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- .../lib/CodeGen/ComplexDeinterleavingPass.cpp | 12 ++++- .../Target/AArch64/AArch64ISelLowering.cpp | 50 ++++++++++++++--- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 2 + .../Transforms/Vectorize/LoopVectorize.cpp | 38 ++++++++++++- 21 files changed, 297 insertions(+), 54 deletions(-) diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h index d900e980852b..13d4568119eb 100644 --- a/clang/include/clang/AST/ExprConcepts.h +++ b/clang/include/clang/AST/ExprConcepts.h @@ -14,20 +14,21 @@ #ifndef LLVM_CLANG_AST_EXPRCONCEPTS_H #define LLVM_CLANG_AST_EXPRCONCEPTS_H -#include "clang/AST/ASTContext.h" #include "clang/AST/ASTConcept.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/AST/DeclarationName.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TrailingObjects.h" -#include #include +#include namespace clang { class ASTStmtReader; @@ -467,6 +468,13 @@ class NestedRequirement : public Requirement { } }; +using EntityPrinter = llvm::function_ref; + +/// \brief create a Requirement::SubstitutionDiagnostic with only a +/// SubstitutedEntity and DiagLoc using Sema's allocator. +Requirement::SubstitutionDiagnostic * +createSubstDiagAt(Sema &S, SourceLocation Location, EntityPrinter Printer); + } // namespace concepts /// C++2a [expr.prim.req]: diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a3506df7d4e5..f09d1129b128 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2386,7 +2386,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { - if (F->getPointerAlignment(getDataLayout()) < 2 && isa(D)) + if (isa(D) && F->getPointerAlignment(getDataLayout()) < 2) F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f6ea4d0b4366..bdbdad9362e1 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4936,6 +4936,12 @@ void Driver::BuildJobs(Compilation &C) const { (void)C.getArgs().hasArg(options::OPT_driver_mode); (void)C.getArgs().hasArg(options::OPT_rsp_quoting); + bool HasAssembleJob = llvm::any_of(C.getJobs(), [](auto &J) { + // Match ClangAs and other derived assemblers of Tool. ClangAs uses a + // longer ShortName "clang integrated assembler" while other assemblers just + // use "assembler". + return strstr(J.getCreator().getShortName(), "assembler"); + }); for (Arg *A : C.getArgs()) { // FIXME: It would be nice to be able to send the argument to the // DiagnosticsEngine, so that extra values, position, and so on could be @@ -4965,7 +4971,7 @@ void Driver::BuildJobs(Compilation &C) const { // already been warned about. if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN)) { if (A->getOption().hasFlag(options::TargetSpecific) && - !A->isIgnoredTargetSpecific()) { + !A->isIgnoredTargetSpecific() && !HasAssembleJob) { Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << getTargetTriple(); } else { diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 97217eba9ca0..bfc86d9f3471 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -30,6 +30,7 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; const bool IsArch32Bit = getToolChain().getTriple().isArch32Bit(); @@ -38,6 +39,11 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, if (!IsArch32Bit && !IsArch64Bit) llvm_unreachable("Unsupported bit width value."); + if (Arg *A = C.getArgs().getLastArg(options::OPT_G)) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getSpelling() << D.getTargetTriple(); + } + // Specify the mode in which the as(1) command operates. if (IsArch32Bit) { CmdArgs.push_back("-a32"); diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 4383b8004143..cf2bc63d74ad 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -118,13 +118,7 @@ std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args, void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, - std::vector &Features, bool ForAS) { - if (ForAS) { - // Some target-specific options are only handled in AddX86TargetArgs, which - // is not called by ClangAs::ConstructJob. Claim them here. - Args.claimAllArgs(options::OPT_mfpmath_EQ); - } - + std::vector &Features) { // Claim and report unsupported -mabi=. Note: we don't support "sysv_abi" or // "ms_abi" as default function attributes. if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mabi_EQ)) { diff --git a/clang/lib/Driver/ToolChains/Arch/X86.h b/clang/lib/Driver/ToolChains/Arch/X86.h index 762a1fa6f4d5..e07387f3ece3 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.h +++ b/clang/lib/Driver/ToolChains/Arch/X86.h @@ -26,7 +26,7 @@ std::string getX86TargetCPU(const Driver &D, const llvm::opt::ArgList &Args, void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, - std::vector &Features, bool ForAS); + std::vector &Features); } // end namespace x86 } // end namespace target diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 8766d34eec53..0d6907b8e5c7 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -528,7 +528,7 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, break; case llvm::Triple::x86: case llvm::Triple::x86_64: - x86::getX86TargetFeatures(D, Triple, Args, Features, ForAS); + x86::getX86TargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::hexagon: hexagon::getHexagonTargetFeatures(D, Triple, Args, Features); diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index c6f958a6077b..0bd4b01ff79d 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/FileEntry.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/Sarif.h" +#include "clang/Basic/Stack.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -1150,6 +1151,10 @@ void ASTFrontendAction::ExecuteAction() { CompilerInstance &CI = getCompilerInstance(); if (!CI.hasPreprocessor()) return; + // This is a fallback: If the client forgets to invoke this, we mark the + // current stack as the bottom. Though not optimal, this could help prevent + // stack overflow during deep recursion. + clang::noteBottomOfStack(); // FIXME: Move the truncation aspect of this into Sema, we delayed this till // here so the source manager would be initialized. diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 423d5372a6f6..1cff4a75790e 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -19,6 +19,7 @@ #include "clang/AST/CharUnits.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" @@ -9072,16 +9073,24 @@ Sema::BuildExprRequirement( MultiLevelTemplateArgumentList MLTAL(Param, TAL.asArray(), /*Final=*/false); MLTAL.addOuterRetainedLevels(TPL->getDepth()); - Expr *IDC = Param->getTypeConstraint()->getImmediatelyDeclaredConstraint(); + const TypeConstraint *TC = Param->getTypeConstraint(); + assert(TC && "Type Constraint cannot be null here"); + auto *IDC = TC->getImmediatelyDeclaredConstraint(); + assert(IDC && "ImmediatelyDeclaredConstraint can't be null here."); ExprResult Constraint = SubstExpr(IDC, MLTAL); if (Constraint.isInvalid()) { - Status = concepts::ExprRequirement::SS_ExprSubstitutionFailure; - } else { - SubstitutedConstraintExpr = - cast(Constraint.get()); - if (!SubstitutedConstraintExpr->isSatisfied()) - Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; - } + return new (Context) concepts::ExprRequirement( + concepts::createSubstDiagAt(*this, IDC->getExprLoc(), + [&](llvm::raw_ostream &OS) { + IDC->printPretty(OS, /*Helper=*/nullptr, + getPrintingPolicy()); + }), + IsSimple, NoexceptLoc, ReturnTypeRequirement); + } + SubstitutedConstraintExpr = + cast(Constraint.get()); + if (!SubstitutedConstraintExpr->isSatisfied()) + Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; } return new (Context) concepts::ExprRequirement(E, IsSimple, NoexceptLoc, ReturnTypeRequirement, Status, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 8702e2ca3a1b..394006a57747 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2276,9 +2276,9 @@ QualType TemplateInstantiator::TransformSubstTemplateTypeParmPackType( getPackIndex(Pack), Arg, TL.getNameLoc()); } -template static concepts::Requirement::SubstitutionDiagnostic * -createSubstDiag(Sema &S, TemplateDeductionInfo &Info, EntityPrinter Printer) { +createSubstDiag(Sema &S, TemplateDeductionInfo &Info, + concepts::EntityPrinter Printer) { SmallString<128> Message; SourceLocation ErrorLoc; if (Info.hasSFINAEDiagnostic()) { @@ -2302,6 +2302,19 @@ createSubstDiag(Sema &S, TemplateDeductionInfo &Info, EntityPrinter Printer) { StringRef(MessageBuf, Message.size())}; } +concepts::Requirement::SubstitutionDiagnostic * +concepts::createSubstDiagAt(Sema &S, SourceLocation Location, + EntityPrinter Printer) { + SmallString<128> Entity; + llvm::raw_svector_ostream OS(Entity); + Printer(OS); + char *EntityBuf = new (S.Context) char[Entity.size()]; + llvm::copy(Entity, EntityBuf); + return new (S.Context) concepts::Requirement::SubstitutionDiagnostic{ + /*SubstitutedEntity=*/StringRef(EntityBuf, Entity.size()), + /*DiagLoc=*/Location, /*DiagMessage=*/StringRef()}; +} + ExprResult TemplateInstantiator::TransformRequiresTypeParams( SourceLocation KWLoc, SourceLocation RBraceLoc, const RequiresExpr *RE, RequiresExprBodyDecl *Body, ArrayRef Params, diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc b/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc index a08ec11e77a4..b46bd2e4d7a4 100644 --- a/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc +++ b/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc @@ -3773,6 +3773,33 @@ SYMBOL(viewable_range, std::ranges::, ) SYMBOL(wistream_view, std::ranges::, ) SYMBOL(zip_transform_view, std::ranges::, ) SYMBOL(zip_view, std::ranges::, ) +SYMBOL(all, std::ranges::views::, ) +SYMBOL(all_t, std::ranges::views::, ) +SYMBOL(as_const, std::ranges::views::, ) +SYMBOL(as_rvalue, std::ranges::views::, ) +SYMBOL(common, std::ranges::views::, ) +SYMBOL(counted, std::ranges::views::, ) +SYMBOL(drop, std::ranges::views::, ) +SYMBOL(drop_while, std::ranges::views::, ) +SYMBOL(elements, std::ranges::views::, ) +SYMBOL(empty, std::ranges::views::, ) +SYMBOL(filter, std::ranges::views::, ) +SYMBOL(iota, std::ranges::views::, ) +SYMBOL(istream, std::ranges::views::, ) +SYMBOL(istream, std::ranges::views::, ) +SYMBOL(join, std::ranges::views::, ) +SYMBOL(join_with, std::ranges::views::, ) +SYMBOL(keys, std::ranges::views::, ) +SYMBOL(lazy_split, std::ranges::views::, ) +SYMBOL(reverse, std::ranges::views::, ) +SYMBOL(single, std::ranges::views::, ) +SYMBOL(split, std::ranges::views::, ) +SYMBOL(take, std::ranges::views::, ) +SYMBOL(take_while, std::ranges::views::, ) +SYMBOL(transform, std::ranges::views::, ) +SYMBOL(values, std::ranges::views::, ) +SYMBOL(zip, std::ranges::views::, ) +SYMBOL(zip_transform, std::ranges::views::, ) SYMBOL(ECMAScript, std::regex_constants::, ) SYMBOL(awk, std::regex_constants::, ) SYMBOL(basic, std::regex_constants::, ) @@ -3817,3 +3844,30 @@ SYMBOL(get_id, std::this_thread::, ) SYMBOL(sleep_for, std::this_thread::, ) SYMBOL(sleep_until, std::this_thread::, ) SYMBOL(yield, std::this_thread::, ) +SYMBOL(all, std::views::, ) +SYMBOL(all_t, std::views::, ) +SYMBOL(as_const, std::views::, ) +SYMBOL(as_rvalue, std::views::, ) +SYMBOL(common, std::views::, ) +SYMBOL(counted, std::views::, ) +SYMBOL(drop, std::views::, ) +SYMBOL(drop_while, std::views::, ) +SYMBOL(elements, std::views::, ) +SYMBOL(empty, std::views::, ) +SYMBOL(filter, std::views::, ) +SYMBOL(iota, std::views::, ) +SYMBOL(istream, std::views::, ) +SYMBOL(istream, std::views::, ) +SYMBOL(join, std::views::, ) +SYMBOL(join_with, std::views::, ) +SYMBOL(keys, std::views::, ) +SYMBOL(lazy_split, std::views::, ) +SYMBOL(reverse, std::views::, ) +SYMBOL(single, std::views::, ) +SYMBOL(split, std::views::, ) +SYMBOL(take, std::views::, ) +SYMBOL(take_while, std::views::, ) +SYMBOL(transform, std::views::, ) +SYMBOL(values, std::views::, ) +SYMBOL(zip, std::views::, ) +SYMBOL(zip_transform, std::views::, ) diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S index 5dc0d5320b5a..1fe18f4a4681 100644 --- a/compiler-rt/lib/builtins/aarch64/lse.S +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -7,7 +7,7 @@ // Out-of-line LSE atomics helpers. Ported from libgcc library. // N = {1, 2, 4, 8} // M = {1, 2, 4, 8, 16} -// ORDER = {'relax', 'acq', 'rel', 'acq_rel'} +// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'} // Routines implemented: // // iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) @@ -35,8 +35,8 @@ HIDDEN(___aarch64_have_lse_atomics) #endif // Generate mnemonics for -// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 -// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4 +// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5 +// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5 #if SIZE == 1 #define S b @@ -64,24 +64,44 @@ HIDDEN(___aarch64_have_lse_atomics) #define L #define M 0x000000 #define N 0x000000 +#define BARRIER #elif MODEL == 2 #define SUFF _acq #define A a #define L #define M 0x400000 #define N 0x800000 +#define BARRIER #elif MODEL == 3 #define SUFF _rel #define A #define L l #define M 0x008000 #define N 0x400000 +#define BARRIER #elif MODEL == 4 #define SUFF _acq_rel #define A a #define L l #define M 0x408000 #define N 0xc00000 +#define BARRIER +#elif MODEL == 5 +#define SUFF _sync +#ifdef L_swp +// swp has _acq semantics. +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#else +// All other _sync functions have _seq semantics. +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#endif +#define BARRIER dmb ish #else #error #endif // MODEL @@ -96,7 +116,12 @@ HIDDEN(___aarch64_have_lse_atomics) #endif #define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) +#if MODEL == 5 +// Drop A for _sync functions. +#define LDXR GLUE3(ld, xr, S) +#else #define LDXR GLUE4(ld, A, xr, S) +#endif #define STXR GLUE4(st, L, xr, S) // Define temporary registers. @@ -136,9 +161,15 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) STXR w(tmp1), s(1), [x2] cbnz w(tmp1), 0b 1: + BARRIER ret #else +#if MODEL == 5 +// Drop A for _sync functions. +#define LDXP GLUE2(ld, xp) +#else #define LDXP GLUE3(ld, A, xp) +#endif #define STXP GLUE3(st, L, xp) #ifdef HAS_ASM_LSE #define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] @@ -159,6 +190,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) STXP w(tmp2), x2, x3, [x4] cbnz w(tmp2), 0b 1: + BARRIER ret #endif END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) @@ -180,6 +212,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) LDXR s(0), [x1] STXR w(tmp1), s(tmp0), [x1] cbnz w(tmp1), 0b + BARRIER ret END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) #endif // L_swp @@ -224,6 +257,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) OP s(tmp1), s(0), s(tmp0) STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b + BARRIER ret END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) #endif // L_ldadd L_ldclr L_ldeor L_ldset diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc index 220abb89c3be..24485900644b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc @@ -340,11 +340,19 @@ static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, size = 0; } COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); - // For %ms/%mc, write the allocated output buffer as well. + // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well. if (dir.allocate) { - char *buf = *(char **)argp; - if (buf) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1); + if (char *buf = *(char **)argp) { + if (dir.convSpecifier == 'c') + size = 1; + else if (dir.convSpecifier == 'C') + size = sizeof(wchar_t); + else if (dir.convSpecifier == 'S') + size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t); + else // 's' or '[' + size = internal_strlen(buf) + 1; + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); + } } } } diff --git a/libcxx/include/__config b/libcxx/include/__config index 9759d3b9e8e0..43f8a20031ff 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -208,19 +208,16 @@ // HARDENING { -// TODO(hardening): remove this in LLVM 18. -// This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes) -// equivalent to setting the hardened mode. -# ifdef _LIBCPP_ENABLE_ASSERTIONS -# warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_ENABLE_HARDENED_MODE instead." -# if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 -# error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" -# endif -# if _LIBCPP_ENABLE_ASSERTIONS -# define _LIBCPP_ENABLE_HARDENED_MODE 1 -# endif +# ifndef _LIBCPP_ENABLE_ASSERTIONS +# define _LIBCPP_ENABLE_ASSERTIONS _LIBCPP_ENABLE_ASSERTIONS_DEFAULT +# endif +# if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 +# error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" # endif +// NOTE: These modes are experimental and are not stable yet in LLVM 17. Please refrain from using them and use the +// documented libc++ "safe" mode instead. +// // Enables the hardened mode which consists of all checks intended to be used in production. Hardened mode prioritizes // security-critical checks that can be done with relatively little overhead in constant time. Mutually exclusive with // `_LIBCPP_ENABLE_DEBUG_MODE`. @@ -275,6 +272,11 @@ # error "Only one of _LIBCPP_ENABLE_HARDENED_MODE and _LIBCPP_ENABLE_DEBUG_MODE can be enabled." # endif +# if _LIBCPP_ENABLE_ASSERTIONS && (_LIBCPP_ENABLE_HARDENED_MODE || _LIBCPP_ENABLE_DEBUG_MODE) +# error \ + "_LIBCPP_ENABLE_ASSERTIONS is mutually exclusive with _LIBCPP_ENABLE_HARDENED_MODE and _LIBCPP_ENABLE_DEBUG_MODE." +# endif + // Hardened mode checks. // clang-format off @@ -303,6 +305,18 @@ # define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) +// Safe mode checks. + +# elif _LIBCPP_ENABLE_ASSERTIONS + +// All checks enabled. +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) + // Disable all checks if hardening is not enabled. # else diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index b109b7f7e65a..7b2bfdac75a8 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -115,6 +115,9 @@ class LazyValueInfo { /// PredBB to OldSucc to be from PredBB to NewSucc instead. void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); + /// Remove information related to this value from the cache. + void forgetValue(Value *V); + /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 33651783cb17..2ba6036056d9 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -465,6 +465,10 @@ class LazyValueInfoImpl { F.print(OS, &Writer); } + /// This is part of the update interface to remove information related to this + /// value from the cache. + void forgetValue(Value *V) { TheCache.eraseValue(V); } + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB) { @@ -1969,6 +1973,11 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, } } +void LazyValueInfo::forgetValue(Value *V) { + if (PImpl) + getImpl(PImpl, AC, nullptr).forgetValue(V); +} + void LazyValueInfo::eraseBlock(BasicBlock *BB) { if (PImpl) { getImpl(PImpl, AC, BB->getModule()).eraseBlock(BB); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 111d4d30aab9..39ab48b4a48e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6833,7 +6833,7 @@ const ConstantRange &ScalarEvolution::getRangeRef( if (llvm::isKnownNonZero(V, DL)) MinVal = Align; ConservativeResult = ConservativeResult.intersectWith( - {MinVal, MaxVal + 1}, RangeType); + ConstantRange::getNonEmpty(MinVal, MaxVal + 1), RangeType); } } diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 952f454f8f6a..7979ac9a5fb7 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -1424,7 +1424,17 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) { // CompositeNode we should choose only one either Real or Imag instruction to // use as an anchor for generating complex instruction. auto It = RootToNode.find(RootI); - if (It != RootToNode.end() && It->second->Real == RootI) { + if (It != RootToNode.end()) { + auto RootNode = It->second; + assert(RootNode->Operation == + ComplexDeinterleavingOperation::ReductionOperation); + // Find out which part, Real or Imag, comes later, and only if we come to + // the latest part, add it to OrderedRoots. + auto *R = cast(RootNode->Real); + auto *I = cast(RootNode->Imag); + auto *ReplacementAnchor = R->comesBefore(I) ? I : R; + if (ReplacementAnchor != RootI) + return false; OrderedRoots.push_back(RootI); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0605dfa63793..c7a6dd7deb45 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13840,7 +13840,17 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4: case Intrinsic::aarch64_neon_ld1x2: case Intrinsic::aarch64_neon_ld1x3: - case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld1x4: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.arg_size() - 1); + Info.offset = 0; + Info.align.reset(); + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; + return true; + } case Intrinsic::aarch64_neon_ld2lane: case Intrinsic::aarch64_neon_ld3lane: case Intrinsic::aarch64_neon_ld4lane: @@ -13848,9 +13858,13 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld3r: case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + // ldx return struct with the same vec type + Type *RetTy = I.getType(); + auto *StructTy = cast(RetTy); + unsigned NumElts = StructTy->getNumElements(); + Type *VecTy = StructTy->getElementType(0); + MVT EleVT = MVT::getVT(VecTy).getVectorElementType(); + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; Info.align.reset(); @@ -13863,20 +13877,40 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_st4: case Intrinsic::aarch64_neon_st1x2: case Intrinsic::aarch64_neon_st1x3: - case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st1x4: { + Info.opc = ISD::INTRINSIC_VOID; + unsigned NumElts = 0; + for (const Value *Arg : I.args()) { + Type *ArgTy = Arg->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.arg_size() - 1); + Info.offset = 0; + Info.align.reset(); + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; + return true; + } case Intrinsic::aarch64_neon_st2lane: case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: { Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; + // all the vector type is same + Type *VecTy = I.getArgOperand(0)->getType(); + MVT EleVT = MVT::getVT(VecTy).getVectorElementType(); + for (const Value *Arg : I.args()) { Type *ArgTy = Arg->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeSizeInBits(ArgTy) / 64; + NumElts += 1; } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; Info.align.reset(); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 24390f1b54f6..5b8f1b00dc03 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1269,6 +1269,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (IsLoadCSE) { LoadInst *NLoadI = cast(AvailableVal); combineMetadataForCSE(NLoadI, LoadI, false); + LVI->forgetValue(NLoadI); }; // If the returned value is the load itself, replace with poison. This can @@ -1461,6 +1462,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { for (LoadInst *PredLoadI : CSELoads) { combineMetadataForCSE(PredLoadI, LoadI, true); + LVI->forgetValue(PredLoadI); } LoadI->replaceAllUsesWith(PN); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d7e40e8ef978..b603bbe55dc9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3781,10 +3781,44 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { // the incoming edges. VPBasicBlock *Header = State.Plan->getVectorLoopRegion()->getEntryBasicBlock(); + + // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores + // sank outside of the loop would keep the same order as they had in the + // original loop. + SmallVector ReductionPHIList; for (VPRecipeBase &R : Header->phis()) { if (auto *ReductionPhi = dyn_cast(&R)) - fixReduction(ReductionPhi, State); - else if (auto *FOR = dyn_cast(&R)) + ReductionPHIList.emplace_back(ReductionPhi); + } + stable_sort(ReductionPHIList, [this](const VPReductionPHIRecipe *R1, + const VPReductionPHIRecipe *R2) { + auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore; + auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore; + + // If neither of the recipes has an intermediate store, keep the order the + // same. + if (!IS1 && !IS2) + return false; + + // If only one of the recipes has an intermediate store, then move it + // towards the beginning of the list. + if (IS1 && !IS2) + return true; + + if (!IS1 && IS2) + return false; + + // If both recipes have an intermediate store, then the recipe with the + // later store should be processed earlier. So it should go to the beginning + // of the list. + return DT->dominates(IS2, IS1); + }); + + for (VPReductionPHIRecipe *ReductionPhi : ReductionPHIList) + fixReduction(ReductionPhi, State); + + for (VPRecipeBase &R : Header->phis()) { + if (auto *FOR = dyn_cast(&R)) fixFixedOrderRecurrence(FOR, State); } }