diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6501a4870e2a6e..27ff9ddc70a34e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -110,6 +110,13 @@ Attribute Changes in Clang attribute is handled instead, e.g. in ``handleDeclAttribute``. (This was changed in order to better support attributes in code completion). +- __has_cpp_attribute, __has_c_attribute, __has_attribute, and __has_declspec + will now macro expand their argument. This causes a change in behavior for + code using ``__has_cpp_attribute(__clang__::attr)`` (and same for + ``__has_c_attribute``) where it would previously expand to ``0`` for all + attributes, but will now issue an error due to the expansion of the + predefined ``__clang__`` macro. + Windows Support --------------- @@ -122,6 +129,9 @@ Windows Support C Language Changes in Clang --------------------------- +- The value of ``__STDC_VERSION__`` has been bumped to ``202000L`` when passing + ``-std=c2x`` so that it can be distinguished from C17 mode. This value is + expected to change again when C23 is published. - Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'`` are now ill-formed in all language modes. The motivation for this change is outlined in `P2362 `_. diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index a28348a7af169b..6cb6d0b2d1fcc4 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -212,7 +212,7 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) { return "lp64d"; return "lp64"; } - llvm_unreachable(); + llvm_unreachable("unhandled XLen"); } // 3. Choose a default based on the triple diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index aa94b130cb1240..a3e1ca5d5226c7 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -371,7 +371,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, // value is, are implementation-defined. // (Removed in C++20.) if (!LangOpts.CPlusPlus) { - if (LangOpts.C17) + // FIXME: Use correct value for C23. + if (LangOpts.C2x) + Builder.defineMacro("__STDC_VERSION__", "202000L"); + else if (LangOpts.C17) Builder.defineMacro("__STDC_VERSION__", "201710L"); else if (LangOpts.C11) Builder.defineMacro("__STDC_VERSION__", "201112L"); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index bf19f538647e6c..5a0fa5184e38bf 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1293,7 +1293,7 @@ static bool EvaluateHasIncludeNext(Token &Tok, /// integer values. static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS, Token &Tok, IdentifierInfo *II, - Preprocessor &PP, + Preprocessor &PP, bool ExpandArgs, llvm::function_ref< int(Token &Tok, bool &HasLexedNextTok)> Op) { @@ -1319,7 +1319,10 @@ static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS, bool SuppressDiagnostic = false; while (true) { // Parse next token. - PP.LexUnexpandedToken(Tok); + if (ExpandArgs) + PP.Lex(Tok); + else + PP.LexUnexpandedToken(Tok); already_lexed: switch (Tok.getKind()) { @@ -1609,21 +1612,21 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { OS << CounterValue++; Tok.setKind(tok::numeric_constant); } else if (II == Ident__has_feature) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); return II && HasFeature(*this, II->getName()); }); } else if (II == Ident__has_extension) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); return II && HasExtension(*this, II->getName()); }); } else if (II == Ident__has_builtin) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); @@ -1675,12 +1678,12 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } }); } else if (II == Ident__is_identifier) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [](Token &Tok, bool &HasLexedNextToken) -> int { return Tok.is(tok::identifier); }); } else if (II == Ident__has_attribute) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); @@ -1688,7 +1691,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { getTargetInfo(), getLangOpts()) : 0; }); } else if (II == Ident__has_declspec) { - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); @@ -1704,8 +1707,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } else if (II == Ident__has_cpp_attribute || II == Ident__has_c_attribute) { bool IsCXX = II == Ident__has_cpp_attribute; - EvaluateFeatureLikeBuiltinMacro( - OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int { + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true, + [&](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *ScopeII = nullptr; IdentifierInfo *II = ExpectFeatureIdentifierInfo( Tok, *this, diag::err_feature_check_malformed); @@ -1719,7 +1722,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { HasLexedNextToken = true; else { ScopeII = II; - LexUnexpandedToken(Tok); + // Lex an expanded token for the attribute name. + Lex(Tok); II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_feature_check_malformed); } @@ -1746,7 +1750,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { Tok.setKind(tok::numeric_constant); } else if (II == Ident__has_warning) { // The argument should be a parenthesized string literal. - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [this](Token &Tok, bool &HasLexedNextToken) -> int { std::string WarningName; SourceLocation StrStartLoc = Tok.getLocation(); @@ -1777,7 +1781,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // The argument to this builtin should be an identifier. The // builtin evaluates to 1 when that identifier names the module we are // currently building. - EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, + EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this, diag::err_expected_id_building_module); @@ -1837,28 +1841,32 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { return; } else if (II == Ident__is_target_arch) { EvaluateFeatureLikeBuiltinMacro( - OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo( Tok, *this, diag::err_feature_check_malformed); return II && isTargetArch(getTargetInfo(), II); }); } else if (II == Ident__is_target_vendor) { EvaluateFeatureLikeBuiltinMacro( - OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo( Tok, *this, diag::err_feature_check_malformed); return II && isTargetVendor(getTargetInfo(), II); }); } else if (II == Ident__is_target_os) { EvaluateFeatureLikeBuiltinMacro( - OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo( Tok, *this, diag::err_feature_check_malformed); return II && isTargetOS(getTargetInfo(), II); }); } else if (II == Ident__is_target_environment) { EvaluateFeatureLikeBuiltinMacro( - OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int { + OS, Tok, II, *this, false, + [this](Token &Tok, bool &HasLexedNextToken) -> int { IdentifierInfo *II = ExpectFeatureIdentifierInfo( Tok, *this, diag::err_feature_check_malformed); return II && isTargetEnvironment(getTargetInfo(), II); diff --git a/clang/test/Preprocessor/c2x.c b/clang/test/Preprocessor/c2x.c new file mode 100644 index 00000000000000..96fc9273a28685 --- /dev/null +++ b/clang/test/Preprocessor/c2x.c @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c2x %s +// expected-no-diagnostics + +// FIXME: Test the correct value once C23 ships. +_Static_assert(__STDC_VERSION__ > 201710L, "Incorrect __STDC_VERSION__"); diff --git a/clang/test/Preprocessor/has_attribute.c b/clang/test/Preprocessor/has_attribute.c index 4970dc5904230a..eef168e8791032 100644 --- a/clang/test/Preprocessor/has_attribute.c +++ b/clang/test/Preprocessor/has_attribute.c @@ -56,3 +56,11 @@ int has_no_volatile_attribute(); #if __has_cpp_attribute(selectany) // expected-error {{function-like macro '__has_cpp_attribute' is not defined}} #endif + +// Test that macro expansion of the builtin argument works. +#define F fallthrough + +#if __has_attribute(F) +int has_fallthrough; +#endif +// CHECK: int has_fallthrough; diff --git a/clang/test/Preprocessor/has_attribute.cpp b/clang/test/Preprocessor/has_attribute.cpp index fe7d29f15de1af..bf0f9b3bc4a8f0 100644 --- a/clang/test/Preprocessor/has_attribute.cpp +++ b/clang/test/Preprocessor/has_attribute.cpp @@ -18,16 +18,6 @@ CXX11(clang::__fallthrough__) // CHECK: __gsl__::suppress: 0 CXX11(__gsl__::suppress) -// We do somewhat support the __clang__ vendor namespace, but it is a -// predefined macro and thus we encourage users to use _Clang instead. -// Because of this, we do not support __has_cpp_attribute for that -// vendor namespace. -// -// Note, we can't use CXX11 here because it will expand __clang__ to 1 -// too early. -// CHECK: 1::fallthrough: 0 -__clang__::fallthrough: __has_cpp_attribute(__clang__::fallthrough) - // CHECK: _Clang::fallthrough: 201603L CXX11(_Clang::fallthrough) @@ -70,6 +60,50 @@ CXX11(unlikely) // CHECK: noreturn: 200809L // CHECK: unlikely: 201803L +namespace PR48462 { +// Test that macro expansion of the builtin argument works. +#define C clang +#define F fallthrough +#define CF clang::fallthrough + +#if __has_cpp_attribute(F) +int has_fallthrough; +#endif +// CHECK: int has_fallthrough; + +#if __has_cpp_attribute(C::F) +int has_clang_falthrough_1; +#endif +// CHECK: int has_clang_falthrough_1; + +#if __has_cpp_attribute(clang::F) +int has_clang_falthrough_2; +#endif +// CHECK: int has_clang_falthrough_2; + +#if __has_cpp_attribute(C::fallthrough) +int has_clang_falthrough_3; +#endif +// CHECK: int has_clang_falthrough_3; + +#if __has_cpp_attribute(CF) +int has_clang_falthrough_4; +#endif +// CHECK: int has_clang_falthrough_4; + +#define FUNCLIKE1(x) clang::x +#if __has_cpp_attribute(FUNCLIKE1(fallthrough)) +int funclike_1; +#endif +// CHECK: int funclike_1; + +#define FUNCLIKE2(x) _Clang::x +#if __has_cpp_attribute(FUNCLIKE2(fallthrough)) +int funclike_2; +#endif +// CHECK: int funclike_2; +} + // Test for Microsoft __declspec attributes #define DECLSPEC(x) x: __has_declspec_attribute(x) @@ -81,3 +115,13 @@ DECLSPEC(__uuid__) // CHECK: fallthrough: 0 DECLSPEC(fallthrough) + +namespace PR48462 { +// Test that macro expansion of the builtin argument works. +#define U uuid + +#if __has_declspec_attribute(U) +int has_uuid; +#endif +// CHECK: int has_uuid; +} diff --git a/clang/test/Preprocessor/has_attribute_errors.cpp b/clang/test/Preprocessor/has_attribute_errors.cpp new file mode 100644 index 00000000000000..1fc88d3f926fbd --- /dev/null +++ b/clang/test/Preprocessor/has_attribute_errors.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple i386-unknown-unknown -Eonly -verify %s + +// We warn users if they write an attribute like +// [[__clang__::fallthrough]] because __clang__ is a macro that expands to 1. +// Instead, we suggest users use [[_Clang::fallthrough]] in this situation. +// However, because __has_cpp_attribute (and __has_c_attribute) require +// expanding their argument tokens, __clang__ expands to 1 in the feature test +// macro as well. We don't currently give users a kind warning in this case, +// but we previously did not expand macros and so this would return 0. Now that +// we properly expand macros, users will now get an error about using incorrect +// syntax. + +__has_cpp_attribute(__clang__::fallthrough) // expected-error {{missing ')' after }} \ + // expected-note {{to match this '('}} \ + // expected-error {{builtin feature check macro requires a parenthesized identifier}} + diff --git a/clang/test/Preprocessor/has_c_attribute.c b/clang/test/Preprocessor/has_c_attribute.c index 670e42a97926ef..36dd1c80e7802e 100644 --- a/clang/test/Preprocessor/has_c_attribute.c +++ b/clang/test/Preprocessor/has_c_attribute.c @@ -33,12 +33,45 @@ C2x(__gnu__::warn_unused_result) // CHECK: gnu::__warn_unused_result__: 201904L C2x(gnu::__warn_unused_result__) -// We do somewhat support the __clang__ vendor namespace, but it is a -// predefined macro and thus we encourage users to use _Clang instead. -// Because of this, we do not support __has_c_attribute for that -// vendor namespace. -// -// Note, we can't use C2x here because it will expand __clang__ to 1 -// too early. -// CHECK: 1::fallthrough: 0 -__clang__::fallthrough: __has_c_attribute(__clang__::fallthrough) +// Test that macro expansion of the builtin argument works. +#define C clang +#define L likely +#define CL clang::likely +#define N nodiscard + +#if __has_c_attribute(N) +int has_nodiscard; +#endif +// CHECK: int has_nodiscard; + +#if __has_c_attribute(C::L) +int has_clang_likely_1; +#endif +// CHECK: int has_clang_likely_1; + +#if __has_c_attribute(clang::L) +int has_clang_likely_2; +#endif +// CHECK: int has_clang_likely_2; + +#if __has_c_attribute(C::likely) +int has_clang_likely_3; +#endif +// CHECK: int has_clang_likely_3; + +#if __has_c_attribute(CL) +int has_clang_likely_4; +#endif +// CHECK: int has_clang_likely_4; + +#define FUNCLIKE1(x) clang::x +#if __has_c_attribute(FUNCLIKE1(likely)) +int funclike_1; +#endif +// CHECK: int funclike_1; + +#define FUNCLIKE2(x) _Clang::x +#if __has_c_attribute(FUNCLIKE2(likely)) +int funclike_2; +#endif +// CHECK: int funclike_2; diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp index 0863850e4f1145..c2e852d941c04a 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp @@ -16,108 +16,134 @@ #include "gtest/gtest.h" #include "tsan_rtl.h" +#if SANITIZER_MAC || !defined(__x86_64__) +// These tests are currently crashing on Mac: +// https://reviews.llvm.org/D107911 +// and on ppc64: https://reviews.llvm.org/D110546#3025422 +// due to the way we create thread contexts +// (but they crashed on Mac with normal pthread_create as well). +// There must be some difference in thread initialization +// between normal execution and unit tests. +# define TRACE_TEST(SUITE, NAME) TEST(SUITE, DISABLED_##NAME) +#else +# define TRACE_TEST(SUITE, NAME) TEST(SUITE, NAME) +#endif + namespace __tsan { using namespace v3; // We need to run all trace tests in a new thread, // so that the thread trace is empty initially. -static void run_in_thread(void *(*f)(void *), void *arg = nullptr) { - pthread_t th; - pthread_create(&th, nullptr, f, arg); - pthread_join(th, nullptr); -} - -#if SANITIZER_MAC -// These tests are currently failing on Mac. -// See https://reviews.llvm.org/D107911 for more details. -# define MAYBE_RestoreAccess DISABLED_RestoreAccess -# define MAYBE_MemoryAccessSize DISABLED_MemoryAccessSize -# define MAYBE_RestoreMutexLock DISABLED_RestoreMutexLock -# define MAYBE_MultiPart DISABLED_MultiPart -#else -# define MAYBE_RestoreAccess RestoreAccess -# define MAYBE_MemoryAccessSize MemoryAccessSize -# define MAYBE_RestoreMutexLock RestoreMutexLock -# define MAYBE_MultiPart MultiPart -#endif +template +struct ThreadArray { + ThreadArray() { + for (auto *&thr : threads) { + thr = static_cast( + MmapOrDie(sizeof(ThreadState), "ThreadState")); + Tid tid = ThreadCreate(cur_thread(), 0, 0, true); + Processor *proc = ProcCreate(); + ProcWire(proc, thr); + ThreadStart(thr, tid, 0, ThreadType::Fiber); + } + } -TEST(Trace, MAYBE_RestoreAccess) { - struct Thread { - static void *Func(void *arg) { - // A basic test with some function entry/exit events, - // some mutex lock/unlock events and some other distracting - // memory events. - ThreadState *thr = cur_thread(); - TraceFunc(thr, 0x1000); - TraceFunc(thr, 0x1001); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001); - TraceMutexUnlock(thr, 0x5000); - TraceFunc(thr); - CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead)); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002); - TraceFunc(thr, 0x1002); - CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead)); - // This is the access we want to find. - // The previous one is equivalent, but RestoreStack must prefer - // the last of the matchig accesses. - CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); - VarSizeStackTrace stk; - MutexSet mset; - uptr tag = kExternalTagNone; - bool res = - RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, - thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag); - CHECK(res); - CHECK_EQ(stk.size, 3); - CHECK_EQ(stk.trace[0], 0x1000); - CHECK_EQ(stk.trace[1], 0x1002); - CHECK_EQ(stk.trace[2], 0x2002); - CHECK_EQ(mset.Size(), 2); - CHECK_EQ(mset.Get(0).addr, 0x5001); - CHECK_EQ(mset.Get(0).stack_id, 0x6001); - CHECK_EQ(mset.Get(0).write, true); - CHECK_EQ(mset.Get(1).addr, 0x5002); - CHECK_EQ(mset.Get(1).stack_id, 0x6002); - CHECK_EQ(mset.Get(1).write, false); - CHECK_EQ(tag, kExternalTagNone); - return nullptr; + ~ThreadArray() { + for (uptr i = 0; i < N; i++) { + if (threads[i]) + Finish(i); } - }; - run_in_thread(Thread::Func); + } + + void Finish(uptr i) { + auto *thr = threads[i]; + threads[i] = nullptr; + Processor *proc = thr->proc(); + ThreadFinish(thr); + ProcUnwire(proc, thr); + ProcDestroy(proc); + UnmapOrDie(thr, sizeof(ThreadState)); + } + + ThreadState *threads[N]; + ThreadState *operator[](uptr i) { return threads[i]; } + ThreadState *operator->() { return threads[0]; } + operator ThreadState *() { return threads[0]; } +}; + +TRACE_TEST(Trace, RestoreAccess) { + // A basic test with some function entry/exit events, + // some mutex lock/unlock events and some other distracting + // memory events. + ThreadArray<1> thr; + TraceFunc(thr, 0x1000); + TraceFunc(thr, 0x1001); + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001); + TraceMutexUnlock(thr, 0x5000); + TraceFunc(thr); + CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead)); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002); + TraceFunc(thr, 0x1002); + CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead)); + // This is the access we want to find. + // The previous one is equivalent, but RestoreStack must prefer + // the last of the matchig accesses. + CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); + VarSizeStackTrace stk; + MutexSet mset; + uptr tag = kExternalTagNone; + bool res = + RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, + 0x3000, 8, kAccessRead, &stk, &mset, &tag); + CHECK(res); + CHECK_EQ(stk.size, 3); + CHECK_EQ(stk.trace[0], 0x1000); + CHECK_EQ(stk.trace[1], 0x1002); + CHECK_EQ(stk.trace[2], 0x2002); + CHECK_EQ(mset.Size(), 2); + CHECK_EQ(mset.Get(0).addr, 0x5001); + CHECK_EQ(mset.Get(0).stack_id, 0x6001); + CHECK_EQ(mset.Get(0).write, true); + CHECK_EQ(mset.Get(1).addr, 0x5002); + CHECK_EQ(mset.Get(1).stack_id, 0x6002); + CHECK_EQ(mset.Get(1).write, false); + CHECK_EQ(tag, kExternalTagNone); } -TEST(Trace, MAYBE_MemoryAccessSize) { - struct Thread { - struct Params { - uptr access_size, offset, size; - bool res; - int type; - }; - static void *Func(void *arg) { - // Test tracing and matching of accesses of different sizes. - const Params *params = static_cast(arg); +TRACE_TEST(Trace, MemoryAccessSize) { + // Test tracing and matching of accesses of different sizes. + struct Params { + uptr access_size, offset, size; + bool res; + }; + Params tests[] = { + {1, 0, 1, true}, {4, 0, 2, true}, + {4, 2, 2, true}, {8, 3, 1, true}, + {2, 1, 1, true}, {1, 1, 1, false}, + {8, 5, 4, false}, {4, static_cast(-1l), 4, false}, + }; + for (auto params : tests) { + for (int type = 0; type < 3; type++) { + ThreadArray<1> thr; Printf("access_size=%zu, offset=%zu, size=%zu, res=%d, type=%d\n", - params->access_size, params->offset, params->size, params->res, - params->type); - ThreadState *thr = cur_thread(); + params.access_size, params.offset, params.size, params.res, type); TraceFunc(thr, 0x1000); - switch (params->type) { + switch (type) { case 0: // This should emit compressed event. - CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params->access_size, + CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params.access_size, kAccessRead)); break; case 1: // This should emit full event. - CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000, - params->access_size, kAccessRead)); + CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000, params.access_size, + kAccessRead)); break; case 2: - TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params->access_size, + TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params.access_size, kAccessRead); break; } @@ -127,105 +153,82 @@ TEST(Trace, MAYBE_MemoryAccessSize) { MutexSet mset; uptr tag = kExternalTagNone; bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, - thr->epoch, 0x3000 + params->offset, params->size, + thr->epoch, 0x3000 + params.offset, params.size, kAccessRead, &stk, &mset, &tag); - CHECK_EQ(res, params->res); - if (params->res) { + CHECK_EQ(res, params.res); + if (params.res) { CHECK_EQ(stk.size, 2); CHECK_EQ(stk.trace[0], 0x1000); - CHECK_EQ(stk.trace[1], params->type ? 0x2000000 : 0x2000); + CHECK_EQ(stk.trace[1], type ? 0x2000000 : 0x2000); } - return nullptr; } - }; - Thread::Params tests[] = { - {1, 0, 1, true, 0}, {4, 0, 2, true, 0}, - {4, 2, 2, true, 0}, {8, 3, 1, true, 0}, - {2, 1, 1, true, 0}, {1, 1, 1, false, 0}, - {8, 5, 4, false, 0}, {4, static_cast(-1l), 4, false, 0}, - }; - for (auto params : tests) { - for (params.type = 0; params.type < 3; params.type++) - run_in_thread(Thread::Func, ¶ms); } } -TEST(Trace, MAYBE_RestoreMutexLock) { - struct Thread { - static void *Func(void *arg) { - // Check of restoration of a mutex lock event. - ThreadState *thr = cur_thread(); - TraceFunc(thr, 0x1000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); - VarSizeStackTrace stk; - MutexSet mset; - uptr tag = kExternalTagNone; - bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, - thr->epoch, 0x5001, 0, 0, &stk, &mset, &tag); - CHECK(res); - CHECK_EQ(stk.size, 2); - CHECK_EQ(stk.trace[0], 0x1000); - CHECK_EQ(stk.trace[1], 0x4002); - CHECK_EQ(mset.Size(), 2); - CHECK_EQ(mset.Get(0).addr, 0x5000); - CHECK_EQ(mset.Get(0).stack_id, 0x6000); - CHECK_EQ(mset.Get(0).write, true); - CHECK_EQ(mset.Get(1).addr, 0x5001); - CHECK_EQ(mset.Get(1).stack_id, 0x6001); - CHECK_EQ(mset.Get(1).write, false); - return nullptr; - } - }; - run_in_thread(Thread::Func); +TRACE_TEST(Trace, RestoreMutexLock) { + // Check of restoration of a mutex lock event. + ThreadArray<1> thr; + TraceFunc(thr, 0x1000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002); + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); + VarSizeStackTrace stk; + MutexSet mset; + uptr tag = kExternalTagNone; + bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch, + 0x5001, 0, 0, &stk, &mset, &tag); + CHECK(res); + CHECK_EQ(stk.size, 2); + CHECK_EQ(stk.trace[0], 0x1000); + CHECK_EQ(stk.trace[1], 0x4002); + CHECK_EQ(mset.Size(), 2); + CHECK_EQ(mset.Get(0).addr, 0x5000); + CHECK_EQ(mset.Get(0).stack_id, 0x6000); + CHECK_EQ(mset.Get(0).write, true); + CHECK_EQ(mset.Get(1).addr, 0x5001); + CHECK_EQ(mset.Get(1).stack_id, 0x6001); + CHECK_EQ(mset.Get(1).write, false); } -TEST(Trace, MAYBE_MultiPart) { - struct Thread { - static void *Func(void *arg) { - // Check replay of a trace with multiple parts. - ThreadState *thr = cur_thread(); - TraceFunc(thr, 0x1000); - TraceFunc(thr, 0x2000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event); - for (uptr i = 0; i < kEvents; i++) { - TraceFunc(thr, 0x3000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002); - TraceMutexUnlock(thr, 0x5002); - TraceFunc(thr); - } - TraceFunc(thr, 0x4000); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); - CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); - VarSizeStackTrace stk; - MutexSet mset; - uptr tag = kExternalTagNone; - bool res = - RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, - thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag); - CHECK(res); - CHECK_EQ(stk.size, 4); - CHECK_EQ(stk.trace[0], 0x1000); - CHECK_EQ(stk.trace[1], 0x2000); - CHECK_EQ(stk.trace[2], 0x4000); - CHECK_EQ(stk.trace[3], 0x2002); - CHECK_EQ(mset.Size(), 2); - CHECK_EQ(mset.Get(0).addr, 0x5000); - CHECK_EQ(mset.Get(0).stack_id, 0x6000); - CHECK_EQ(mset.Get(0).write, true); - CHECK_EQ(mset.Get(1).addr, 0x5001); - CHECK_EQ(mset.Get(1).stack_id, 0x6001); - CHECK_EQ(mset.Get(1).write, false); - return nullptr; - } - }; - run_in_thread(Thread::Func); +TRACE_TEST(Trace, MultiPart) { + // Check replay of a trace with multiple parts. + ThreadArray<1> thr; + TraceFunc(thr, 0x1000); + TraceFunc(thr, 0x2000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event); + for (uptr i = 0; i < kEvents; i++) { + TraceFunc(thr, 0x3000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002); + TraceMutexUnlock(thr, 0x5002); + TraceFunc(thr); + } + TraceFunc(thr, 0x4000); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); + CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); + VarSizeStackTrace stk; + MutexSet mset; + uptr tag = kExternalTagNone; + bool res = + RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, + 0x3000, 8, kAccessRead, &stk, &mset, &tag); + CHECK(res); + CHECK_EQ(stk.size, 4); + CHECK_EQ(stk.trace[0], 0x1000); + CHECK_EQ(stk.trace[1], 0x2000); + CHECK_EQ(stk.trace[2], 0x4000); + CHECK_EQ(stk.trace[3], 0x2002); + CHECK_EQ(mset.Size(), 2); + CHECK_EQ(mset.Get(0).addr, 0x5000); + CHECK_EQ(mset.Get(0).stack_id, 0x6000); + CHECK_EQ(mset.Get(0).write, true); + CHECK_EQ(mset.Get(1).addr, 0x5001); + CHECK_EQ(mset.Get(1).stack_id, 0x6001); + CHECK_EQ(mset.Get(1).write, false); } } // namespace __tsan diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 88f5740765de6a..5e17f7a3b977dc 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -837,6 +837,42 @@ void OmpStructureChecker::Leave(const parser::OmpEndSectionsDirective &x) { } } +void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( + const parser::OmpObjectList &objList) { + for (const auto &ompObject : objList.v) { + std::visit( + common::visitors{ + [&](const parser::Designator &) { + if (const auto *name{parser::Unwrap(ompObject)}) { + const auto &scope{context_.FindScope(name->symbol->name())}; + if (FindCommonBlockContaining(*name->symbol)) { + context_.Say(name->source, + "A variable in a %s directive cannot be an element of a " + "common block"_err_en_US, + ContextDirectiveAsFortran()); + } else if (!IsSave(*name->symbol) && + scope.kind() != Scope::Kind::MainProgram && + scope.kind() != Scope::Kind::Module) { + context_.Say(name->source, + "A variable that appears in a %s directive must be " + "declared in the scope of a module or have the SAVE " + "attribute, either explicitly or implicitly"_err_en_US, + ContextDirectiveAsFortran()); + } + if (FindEquivalenceSet(*name->symbol)) { + context_.Say(name->source, + "A variable in a %s directive cannot appear in an " + "EQUIVALENCE statement"_err_en_US, + ContextDirectiveAsFortran()); + } + } + }, + [&](const parser::Name &) {}, // common block + }, + ompObject.u); + } +} + void OmpStructureChecker::Enter(const parser::OpenMPThreadprivate &c) { const auto &dir{std::get(c.t)}; PushContextAndClauseSets( @@ -847,6 +883,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPThreadprivate &c) { const auto &dir{std::get(c.t)}; const auto &objectList{std::get(c.t)}; CheckIsVarPartOfAnotherVar(dir.source, objectList); + CheckThreadprivateOrDeclareTargetVar(objectList); dirContext_.pop_back(); } @@ -892,7 +929,25 @@ void OmpStructureChecker::Enter(const parser::OpenMPDeclareTargetConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &) { +void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) { + const auto &dir{std::get(x.t)}; + const auto &spec{std::get(x.t)}; + if (const auto *objectList{parser::Unwrap(spec.u)}) { + CheckIsVarPartOfAnotherVar(dir.source, *objectList); + CheckThreadprivateOrDeclareTargetVar(*objectList); + } else if (const auto *clauseList{ + parser::Unwrap(spec.u)}) { + for (const auto &clause : clauseList->v) { + if (const auto *toClause{std::get_if(&clause.u)}) { + CheckIsVarPartOfAnotherVar(dir.source, toClause->v); + CheckThreadprivateOrDeclareTargetVar(toClause->v); + } else if (const auto *linkClause{ + std::get_if(&clause.u)}) { + CheckIsVarPartOfAnotherVar(dir.source, linkClause->v); + CheckThreadprivateOrDeclareTargetVar(linkClause->v); + } + } + } dirContext_.pop_back(); } @@ -1635,7 +1690,8 @@ bool OmpStructureChecker::IsDataRefTypeParamInquiry( void OmpStructureChecker::CheckIsVarPartOfAnotherVar( const parser::CharBlock &source, const parser::OmpObjectList &objList) { OmpDirectiveSet nonPartialVarSet{llvm::omp::Directive::OMPD_allocate, - llvm::omp::Directive::OMPD_threadprivate}; + llvm::omp::Directive::OMPD_threadprivate, + llvm::omp::Directive::OMPD_declare_target}; for (const auto &ompObject : objList.v) { std::visit( common::visitors{ diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index d82bf0032a2439..bf98f360ed58b4 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -211,6 +211,8 @@ class OmpStructureChecker bool IsDataRefTypeParamInquiry(const parser::DataRef *dataRef); void CheckIsVarPartOfAnotherVar( const parser::CharBlock &source, const parser::OmpObjectList &objList); + void CheckThreadprivateOrDeclareTargetVar( + const parser::OmpObjectList &objList); void CheckIntentInPointer( const parser::OmpObjectList &, const llvm::omp::Clause); void GetSymbolsInObjectList(const parser::OmpObjectList &, SymbolSourceMap &); diff --git a/flang/test/Semantics/omp-declarative-directive.f90 b/flang/test/Semantics/omp-declarative-directive.f90 index 15744d9797935e..b9b39a30968745 100644 --- a/flang/test/Semantics/omp-declarative-directive.f90 +++ b/flang/test/Semantics/omp-declarative-directive.f90 @@ -44,13 +44,20 @@ module m2 contains subroutine foo !$omp declare target + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly !$omp declare target (foo, N, M) + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly !$omp declare target to(Q, S) link(R) !ERROR: MAP clause is not allowed on the DECLARE TARGET directive !$omp declare target map(from:Q) integer, parameter :: N=10000, M=1024 integer :: i real :: Q(N, N), R(N,M), S(M,M) + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly !$omp threadprivate(i) end subroutine foo end module m2 diff --git a/flang/test/Semantics/omp-declare-target01.f90 b/flang/test/Semantics/omp-declare-target01.f90 new file mode 100644 index 00000000000000..972468fd9d0655 --- /dev/null +++ b/flang/test/Semantics/omp-declare-target01.f90 @@ -0,0 +1,113 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.14.7 Declare Target Directive + +module declare_target01 + use omp_lib + type my_type(kind_param, len_param) + integer, KIND :: kind_param + integer, LEN :: len_param + integer :: t_i + integer :: t_arr(10) + end type my_type + + type(my_type(2, 4)) :: my_var, my_var2 + integer :: arr(10), arr2(10) + integer(kind=4) :: x, x2 + character(len=32) :: w, w2 + integer, dimension(:), allocatable :: y, y2 + + !$omp declare target (my_var) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target (my_var%t_i) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target (my_var%t_arr) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target (my_var%kind_param) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target (my_var%len_param) + + !$omp declare target (arr) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target (arr(1)) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target (arr(1:2)) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target (x%KIND) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target (w%LEN) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target (y%KIND) + + !$omp declare target to (my_var) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target to (my_var%t_i) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target to (my_var%t_arr) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target to (my_var%kind_param) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target to (my_var%len_param) + + !$omp declare target to (arr) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target to (arr(1)) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target to (arr(1:2)) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target to (x%KIND) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target to (w%LEN) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target to (y%KIND) + + !$omp declare target link (my_var2) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target link (my_var2%t_i) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target link (my_var2%t_arr) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target link (my_var2%kind_param) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target link (my_var2%len_param) + + !$omp declare target link (arr2) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target link (arr2(1)) + + !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive + !$omp declare target link (arr2(1:2)) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target link (x2%KIND) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target link (w2%LEN) + + !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive + !$omp declare target link (y2%KIND) +end diff --git a/flang/test/Semantics/omp-declare-target02.f90 b/flang/test/Semantics/omp-declare-target02.f90 new file mode 100644 index 00000000000000..2ef5df51c6727b --- /dev/null +++ b/flang/test/Semantics/omp-declare-target02.f90 @@ -0,0 +1,176 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.14.7 Declare Target Directive + +program declare_target02 + integer :: arr1(10), arr1_to(10), arr1_link(10) + common /blk1/ a1, a1_to, a1_link + real, save :: eq_a, eq_b, eq_c, eq_d + + + !$omp declare target (arr1) + + !$omp declare target (blk1) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target (a1) + + !$omp declare target to (arr1_to) + + !$omp declare target to (blk1_to) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target to (a1_to) + + !$omp declare target link (arr1_link) + + !$omp declare target link (blk1_link) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target link (a1_link) + + equivalence(eq_a, eq_b) + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target (eq_a) + + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target to (eq_a) + + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target link (eq_b) + + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target (eq_c) + + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target to (eq_c) + + !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement + !$omp declare target link (eq_d) + equivalence(eq_c, eq_d) + +contains + subroutine func() + integer :: arr2(10), arr2_to(10), arr2_link(10) + integer, save :: arr3(10), arr3_to(10), arr3_link(10) + common /blk2/ a2, a2_to, a2_link + common /blk3/ a3, a3_to, a3_link + save /blk3/ + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target (arr2) + + !$omp declare target (arr3) + + !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target (blk2) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target (a2) + + !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target (blk3) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target (a3) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target to (arr2_to) + + !$omp declare target to (arr3_to) + + !ERROR: Implicitly typed local entity 'blk2_to' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target to (blk2_to) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target to (a2_to) + + !ERROR: Implicitly typed local entity 'blk3_to' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target to (blk3_to) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target to (a3_to) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target link (arr2_link) + + !$omp declare target link (arr3_link) + + !ERROR: Implicitly typed local entity 'blk2_link' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target link (blk2_link) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target link (a2_link) + + !ERROR: Implicitly typed local entity 'blk3_link' not allowed in specification expression + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target link (blk3_link) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target link (a3_link) + end +end + +module mod4 + integer :: arr4(10), arr4_to(10), arr4_link(10) + common /blk4/ a4, a4_to, a4_link + + !$omp declare target (arr4) + + !$omp declare target (blk4) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target (a4) + + !$omp declare target to (arr4_to) + + !$omp declare target to (blk4_to) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target to (a4_to) + + !$omp declare target link (arr4_link) + + !$omp declare target link (blk4_link) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target link (a4_link) +end + +subroutine func5() + integer :: arr5(10), arr5_to(10), arr5_link(10) + common /blk5/ a5, a5_to, a5_link + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target (arr5) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target (blk5) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target (a5) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target to (arr5_to) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target to (blk5_to) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target to (a5_to) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target link (arr5_link) + + !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp declare target link (blk5_link) + + !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block + !$omp declare target link (a5_link) +end diff --git a/flang/test/Semantics/omp-threadprivate02.f90 b/flang/test/Semantics/omp-threadprivate02.f90 new file mode 100644 index 00000000000000..4a4034908f40ae --- /dev/null +++ b/flang/test/Semantics/omp-threadprivate02.f90 @@ -0,0 +1,89 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp +! OpenMP Version 5.1 +! Check OpenMP construct validity for the following directives: +! 2.21.2 Threadprivate Directive + +program threadprivate02 + integer :: arr1(10) + common /blk1/ a1 + real, save :: eq_a, eq_b, eq_c, eq_d + + !$omp threadprivate(arr1) + + !$omp threadprivate(/blk1/) + + !$omp threadprivate(blk1) + + !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block + !$omp threadprivate(a1) + + equivalence(eq_a, eq_b) + !ERROR: A variable in a THREADPRIVATE directive cannot appear in an EQUIVALENCE statement + !$omp threadprivate(eq_a) + + !ERROR: A variable in a THREADPRIVATE directive cannot appear in an EQUIVALENCE statement + !$omp threadprivate(eq_c) + equivalence(eq_c, eq_d) + +contains + subroutine func() + integer :: arr2(10) + integer, save :: arr3(10) + common /blk2/ a2 + common /blk3/ a3 + save /blk3/ + + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp threadprivate(arr2) + + !$omp threadprivate(arr3) + + !$omp threadprivate(/blk2/) + + !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp threadprivate(blk2) + + !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block + !$omp threadprivate(a2) + + !$omp threadprivate(/blk3/) + + !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp threadprivate(blk3) + + !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block + !$omp threadprivate(a3) + end +end + +module mod4 + integer :: arr4(10) + common /blk4/ a4 + + !$omp threadprivate(arr4) + + !$omp threadprivate(/blk4/) + + !$omp threadprivate(blk4) + + !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block + !$omp threadprivate(a4) +end + +subroutine func5() + integer :: arr5(10) + common /blk5/ a5 + + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp threadprivate(arr5) + + !$omp threadprivate(/blk5/) + + !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly + !$omp threadprivate(blk5) + + !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block + !$omp threadprivate(a5) +end diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index 44b8c395c89e2e..e464d29afdacac 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -383,6 +383,11 @@ class LLVM_NODISCARD ConstantRange { /// treating both this and \p Other as unsigned ranges. ConstantRange multiply(const ConstantRange &Other) const; + /// Return range of possible values for a signed multiplication of this and + /// \p Other. However, if overflow is possible always return a full range + /// rather than trying to determine a more precise result. + ConstantRange smul_fast(const ConstantRange &Other) const; + /// Return a new range representing the possible values resulting /// from a signed maximum of a value in this range and a value in \p Other. ConstantRange smax(const ConstantRange &Other) const; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3129da27053f29..865db9f798326a 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1302,7 +1302,7 @@ AliasResult BasicAAResult::aliasGEP( computeConstantRange(Var.Val.V, true, &AC, Var.CxtI)); if (!R.isFullSet() && !R.isEmptySet()) VarIndexRange = R.sextOrTrunc(Var.Scale.getBitWidth()) - .multiply(ConstantRange(Var.Scale)); + .smul_fast(ConstantRange(Var.Scale)); } else if (DecompGEP1.VarIndices.size() == 2) { // VarIndex = Scale*V0 + (-Scale)*V1. // If V0 != V1 then abs(VarIndex) >= abs(Scale). diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index d8b4262a811425..6877a5d278ac54 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1054,6 +1054,25 @@ ConstantRange::multiply(const ConstantRange &Other) const { return UR.isSizeStrictlySmallerThan(SR) ? UR : SR; } +ConstantRange ConstantRange::smul_fast(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return getEmpty(); + + APInt Min = getSignedMin(); + APInt Max = getSignedMax(); + APInt OtherMin = Other.getSignedMin(); + APInt OtherMax = Other.getSignedMax(); + + bool O1, O2, O3, O4; + auto Muls = {Min.smul_ov(OtherMin, O1), Min.smul_ov(OtherMax, O2), + Max.smul_ov(OtherMin, O3), Max.smul_ov(OtherMax, O4)}; + if (O1 || O2 || O3 || O4) + return getFull(); + + auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); }; + return getNonEmpty(std::min(Muls, Compare), std::max(Muls, Compare) + 1); +} + ConstantRange ConstantRange::smax(const ConstantRange &Other) const { // X smax Y is: range(smax(X_smin, Y_smin), diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 33ce7df2bddade..ecd89b64b8ea35 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1216,6 +1216,18 @@ def fpimm0 : FPImmLeaf; +def fpimm_half : FPImmLeaf; + +def fpimm_one : FPImmLeaf; + +def fpimm_two : FPImmLeaf; + def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">, GISDNodeXFormEquiv; def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7bd891a2acdc5b..cb83f787a59845 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -405,14 +405,34 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; - defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; - defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; - defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>; - defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>; - defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>; - defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>; - defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; - defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; + defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>; + defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>; + defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>; + defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; + defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; + defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>; + defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>; + defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>; + + defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd; + defm FMIN_ZPZI : sve_fp_2op_i_p_zds_hfd; + + let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { + defm FADD_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd; + } defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 490e08a89471cc..fcb96c3b19db95 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -490,6 +490,21 @@ class SVE_Shift_DupImm_All_Active_Pat; +class SVE_2_Op_Fp_Imm_Pat +: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))), + (inst $Pg, $Zs1, imm)>; + +class SVE_2_Op_Fp_Imm_Pat_Zero +: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)), + (vt (AArch64dup (it immL))))), + (inst $Pg, $Zs1, imm)>; + // // Pseudo -> Instruction mappings // @@ -1745,10 +1760,19 @@ class sve_fp_2op_i_p_zds sz, bits<3> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_fp_2op_i_p_zds opc, string asm, Operand imm_ty> { - def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; - def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; - def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; +multiclass sve_fp_2op_i_p_zds opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> { + let DestructiveInstType = DestructiveBinaryImm in { + def _H : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; + def _S : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; + def _D : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; + } + + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_D")>; } class sve_fp_2op_p_zds sz, bits<4> opc, string asm, @@ -1846,6 +1870,40 @@ multiclass sve_fp_ftmad { (!cast(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>; } +multiclass sve_fp_2op_i_p_zds_hfd { + def _UNDEF_H : PredTwoOpImmPseudo; + def _UNDEF_S : PredTwoOpImmPseudo; + def _UNDEF_D : PredTwoOpImmPseudo; + + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_H")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_S")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; + def : SVE_2_Op_Fp_Imm_Pat(NAME # "_UNDEF_D")>; +} + +multiclass sve_fp_2op_i_p_zds_zeroing_hfd { + def _ZERO_H : PredTwoOpImmPseudo; + def _ZERO_S : PredTwoOpImmPseudo; + def _ZERO_D : PredTwoOpImmPseudo; + + let AddedComplexity = 2 in { + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_H")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_S")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; + def : SVE_2_Op_Fp_Imm_Pat_Zero(NAME # "_ZERO_D")>; + } +} + //===----------------------------------------------------------------------===// // SVE Floating Point Arithmetic - Unpredicated Group //===----------------------------------------------------------------------===// @@ -8371,3 +8429,4 @@ multiclass sve_int_bin_pred_all_active_bhsd { def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_S)>; def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_D)>; } + diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 14b98f3c659081..f45bc81eeba882 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5231,6 +5231,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {2, MVT::v4i64, 4}, // (load 8i64 and) deinterleave into 2 x 4i64 {2, MVT::v8i64, 8}, // (load 16i64 and) deinterleave into 2 x 8i64 {2, MVT::v16i64, 16}, // (load 32i64 and) deinterleave into 2 x 16i64 + {2, MVT::v32i64, 32}, // (load 64i64 and) deinterleave into 2 x 32i64 {3, MVT::v2i8, 3}, // (load 6i8 and) deinterleave into 3 x 2i8 {3, MVT::v4i8, 3}, // (load 12i8 and) deinterleave into 3 x 4i8 @@ -5248,6 +5249,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {3, MVT::v4i32, 3}, // (load 12i32 and) deinterleave into 3 x 4i32 {3, MVT::v8i32, 7}, // (load 24i32 and) deinterleave into 3 x 8i32 {3, MVT::v16i32, 14}, // (load 48i32 and) deinterleave into 3 x 16i32 + {3, MVT::v32i32, 32}, // (load 96i32 and) deinterleave into 3 x 32i32 {3, MVT::v2i64, 1}, // (load 6i64 and) deinterleave into 3 x 2i64 {3, MVT::v4i64, 5}, // (load 12i64 and) deinterleave into 3 x 4i64 @@ -5270,10 +5272,12 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {4, MVT::v4i32, 8}, // (load 16i32 and) deinterleave into 4 x 4i32 {4, MVT::v8i32, 16}, // (load 32i32 and) deinterleave into 4 x 8i32 {4, MVT::v16i32, 32}, // (load 64i32 and) deinterleave into 4 x 16i32 + {4, MVT::v32i32, 68}, // (load 128i32 and) deinterleave into 4 x 32i32 {4, MVT::v2i64, 6}, // (load 8i64 and) deinterleave into 4 x 2i64 {4, MVT::v4i64, 8}, // (load 16i64 and) deinterleave into 4 x 4i64 {4, MVT::v8i64, 20}, // (load 32i64 and) deinterleave into 4 x 8i64 + {4, MVT::v16i64, 40}, // (load 64i64 and) deinterleave into 4 x 16i64 {6, MVT::v2i8, 6}, // (load 12i8 and) deinterleave into 6 x 2i8 {6, MVT::v4i8, 14}, // (load 24i8 and) deinterleave into 6 x 4i8 @@ -5285,6 +5289,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {6, MVT::v4i16, 9}, // (load 24i16 and) deinterleave into 6 x 4i16 {6, MVT::v8i16, 39}, // (load 48i16 and) deinterleave into 6 x 8i16 {6, MVT::v16i16, 106}, // (load 96i16 and) deinterleave into 6 x 16i16 + {6, MVT::v32i16, 212}, // (load 192i16 and) deinterleave into 6 x 32i16 {6, MVT::v2i32, 6}, // (load 12i32 and) deinterleave into 6 x 2i32 {6, MVT::v4i32, 15}, // (load 24i32 and) deinterleave into 6 x 4i32 @@ -5335,6 +5340,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {2, MVT::v4i64, 4}, // interleave 2 x 4i64 into 8i64 (and store) {2, MVT::v8i64, 8}, // interleave 2 x 8i64 into 16i64 (and store) {2, MVT::v16i64, 16}, // interleave 2 x 16i64 into 32i64 (and store) + {2, MVT::v32i64, 32}, // interleave 2 x 32i64 into 64i64 (and store) {3, MVT::v2i8, 4}, // interleave 3 x 2i8 into 6i8 (and store) {3, MVT::v4i8, 4}, // interleave 3 x 4i8 into 12i8 (and store) @@ -5352,6 +5358,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {3, MVT::v4i32, 5}, // interleave 3 x 4i32 into 12i32 (and store) {3, MVT::v8i32, 11}, // interleave 3 x 8i32 into 24i32 (and store) {3, MVT::v16i32, 22}, // interleave 3 x 16i32 into 48i32 (and store) + {3, MVT::v32i32, 48}, // interleave 3 x 32i32 into 96i32 (and store) {3, MVT::v2i64, 4}, // interleave 3 x 2i64 into 6i64 (and store) {3, MVT::v4i64, 6}, // interleave 3 x 4i64 into 12i64 (and store) @@ -5374,10 +5381,12 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {4, MVT::v4i32, 6}, // interleave 4 x 4i32 into 16i32 (and store) {4, MVT::v8i32, 16}, // interleave 4 x 8i32 into 32i32 (and store) {4, MVT::v16i32, 32}, // interleave 4 x 16i32 into 64i32 (and store) + {4, MVT::v32i32, 64}, // interleave 4 x 32i32 into 128i32 (and store) {4, MVT::v2i64, 6}, // interleave 4 x 2i64 into 8i64 (and store) {4, MVT::v4i64, 8}, // interleave 4 x 4i64 into 16i64 (and store) {4, MVT::v8i64, 20}, // interleave 4 x 8i64 into 32i64 (and store) + {4, MVT::v16i64, 40}, // interleave 4 x 16i64 into 64i64 (and store) {6, MVT::v2i8, 7}, // interleave 6 x 2i8 into 12i8 (and store) {6, MVT::v4i8, 9}, // interleave 6 x 4i8 into 24i8 (and store) @@ -5389,6 +5398,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( {6, MVT::v4i16, 15}, // interleave 6 x 4i16 into 24i16 (and store) {6, MVT::v8i16, 21}, // interleave 6 x 8i16 into 48i16 (and store) {6, MVT::v16i16, 58}, // interleave 6 x 16i16 into 96i16 (and store) + {6, MVT::v32i16, 90}, // interleave 6 x 32i16 into 192i16 (and store) {6, MVT::v2i32, 9}, // interleave 6 x 2i32 into 12i32 (and store) {6, MVT::v4i32, 12}, // interleave 6 x 4i32 into 24i32 (and store) diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll index 80a9215b87cca1..6079f87b8e0dec 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, float* %in0, align 4 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load float, float* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load float, float* %in0, align 4 -; AVX2: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load float, float* %in0, align 4 +; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load float, float* %in0, align 4 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll index 4970d17942258c..5fe0776d699a43 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load float, float* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load float, float* %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load float, float* %in0, align 4 -; AVX2: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load float, float* %in0, align 4 +; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load float, float* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll index 67629c58d42c9a..160ada094daa86 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load double, double* %in0, align 8 +; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load double, double* %in0, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll index 31b7c415a9a327..5b7fe79b7cfed0 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll @@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll index 7a9a052e87866a..64dd8b185ecc20 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, i16* %in0, align 2 ; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i16, i16* %in0, align 2 ; AVX2: LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load i16, i16* %in0, align 2 -; AVX2: LV: Found an estimated cost of 516 for VF 32 For instruction: %v0 = load i16, i16* %in0, align 2 +; AVX2: LV: Found an estimated cost of 224 for VF 32 For instruction: %v0 = load i16, i16* %in0, align 2 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; AVX512: LV: Found an estimated cost of 13 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll index caa98c9160b6a3..1462f2d28388c4 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 188 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll index 2eab36508045ee..f92b236dd164a8 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll index d827effecc3835..4cd9372ca072ed 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 276 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll index 78c209fa106145..d84f9f30da418d 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll index 2a3cabfdf97adc..b32ff5b10fea8c 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll index d1660b54645df2..9965ae82249b17 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 104 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll index c1e591bed1ce2d..ececeef5330ee5 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX2: LV: Found an estimated cost of 368 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll index 84e208971455c4..6d24cd248677ba 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 32 For instruction: %v0 = load i64, i64* %in0, align 8 +; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load i64, i64* %in0, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll index 9f6e3807d2d558..fc57e813101612 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll index 875639f212593a..9931665f5179bd 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store float %v2, float* %out2, align 4 ; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store float %v2, float* %out2, align 4 ; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store float %v2, float* %out2, align 4 -; AVX2: LV: Found an estimated cost of 228 for VF 32 For instruction: store float %v2, float* %out2, align 4 +; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store float %v2, float* %out2, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, float* %out2, align 4 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store float %v2, float* %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll index d77ab5609549d5..09312c05f87150 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store float %v3, float* %out3, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store float %v3, float* %out3, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store float %v3, float* %out3, align 4 -; AVX2: LV: Found an estimated cost of 304 for VF 32 For instruction: store float %v3, float* %out3, align 4 +; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store float %v3, float* %out3, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, float* %out3, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store float %v3, float* %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll index 186cd4a5186eca..de6bfc43c3d797 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store double %v1, double* %out1, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v1, double* %out1, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v1, double* %out1, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store double %v1, double* %out1, align 8 +; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v1, double* %out1, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, double* %out1, align 8 ; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store double %v1, double* %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll index 0e9773e0d08c45..fdea21c31b2dce 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll @@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store double %v3, double* %out3, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v3, double* %out3, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v3, double* %out3, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 16 For instruction: store double %v3, double* %out3, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v3, double* %out3, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, double* %out3, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store double %v3, double* %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll index b4693d1c3916a6..05ace8cf22333b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %v5, i16* %out5, align 2 ; AVX2: LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %v5, i16* %out5, align 2 ; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v5, i16* %out5, align 2 -; AVX2: LV: Found an estimated cost of 516 for VF 32 For instruction: store i16 %v5, i16* %out5, align 2 +; AVX2: LV: Found an estimated cost of 102 for VF 32 For instruction: store i16 %v5, i16* %out5, align 2 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, i16* %out5, align 2 ; AVX512: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, i16* %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll index 80b6531f9d38df..844cdc627d608b 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i32 %v2, i32* %out2, align 4 ; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store i32 %v2, i32* %out2, align 4 ; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store i32 %v2, i32* %out2, align 4 -; AVX2: LV: Found an estimated cost of 276 for VF 32 For instruction: store i32 %v2, i32* %out2, align 4 +; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store i32 %v2, i32* %out2, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, i32* %out2, align 4 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i32 %v2, i32* %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll index a5e2645bf1e972..1dd952d18082ae 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %v3, i32* %out3, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store i32 %v3, i32* %out3, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store i32 %v3, i32* %out3, align 4 -; AVX2: LV: Found an estimated cost of 368 for VF 32 For instruction: store i32 %v3, i32* %out3, align 4 +; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store i32 %v3, i32* %out3, align 4 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, i32* %out3, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %v3, i32* %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll index 06d1c6e0c8629a..0f58fe6c969227 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store i64 %v1, i64* %out1, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v1, i64* %out1, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v1, i64* %out1, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 32 For instruction: store i64 %v1, i64* %out1, align 8 +; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v1, i64* %out1, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, i64* %out1, align 8 ; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %v1, i64* %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll index c48b1d7aa41f3c..92da4c8515471a 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu" ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %v3, i64* %out3, align 8 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %v3, i64* %out3, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %v3, i64* %out3, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %v3, i64* %out3, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %v3, i64* %out3, align 8 ; ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, i64* %out3, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %v3, i64* %out3, align 8 diff --git a/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll new file mode 100644 index 00000000000000..8c688e6266924a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll @@ -0,0 +1,1071 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; FADD +; + +define @fadd_h_immhalf( %a) #0 { +; CHECK-LABEL: fadd_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_h_immone( %a) #0 { +; CHECK-LABEL: fadd_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_4h_immhalf( %a) #0 { +; CHECK-LABEL: fadd_4h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_4h_immone( %a) #0 { +; CHECK-LABEL: fadd_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_2h_immhalf( %a) #0 { +; CHECK-LABEL: fadd_2h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_2h_immone( %a) #0 { +; CHECK-LABEL: fadd_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_s_immhalf( %a) #0 { +; CHECK-LABEL: fadd_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_s_immone( %a) #0 { +; CHECK-LABEL: fadd_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_2s_immhalf( %a) #0 { +; CHECK-LABEL: fadd_2s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_2s_immone( %a) #0 { +; CHECK-LABEL: fadd_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + + +define @fadd_d_immhalf( %a) #0 { +; CHECK-LABEL: fadd_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_d_immone( %a) #0 { +; CHECK-LABEL: fadd_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +; +; FMAX +; + +define @fmax_h_immzero( %a) #0 { +; CHECK-LABEL: fmax_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv8f16( %a, %splat) + ret %out +} + +define @fmax_h_immone( %a) #0 { +; CHECK-LABEL: fmax_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv8f16( %a, %splat) + ret %out +} + +define @fmax_4h_immzero( %a) #0 { +; CHECK-LABEL: fmax_4h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f16( %a, %splat) + ret %out +} + +define @fmax_4h_immone( %a) #0 { +; CHECK-LABEL: fmax_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f16( %a, %splat) + ret %out +} + +define @fmax_2h_immzero( %a) #0 { +; CHECK-LABEL: fmax_2h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f16( %a, %splat) + ret %out +} + +define @fmax_2h_immone( %a) #0 { +; CHECK-LABEL: fmax_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f16( %a, %splat) + ret %out +} + +define @fmax_s_immzero( %a) #0 { +; CHECK-LABEL: fmax_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f32( %a, %splat) + ret %out +} + +define @fmax_s_immone( %a) #0 { +; CHECK-LABEL: fmax_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f32( %a, %splat) + ret %out +} + +define @fmax_2s_immzero( %a) #0 { +; CHECK-LABEL: fmax_2s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f32( %a, %splat) + ret %out +} + +define @fmax_2s_immone( %a) #0 { +; CHECK-LABEL: fmax_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f32( %a, %splat) + ret %out +} + +define @fmax_d_immzero( %a) #0 { +; CHECK-LABEL: fmax_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f64( %a, %splat) + ret %out +} + +define @fmax_d_immone( %a) #0 { +; CHECK-LABEL: fmax_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f64( %a, %splat) + ret %out +} + +; +; FMAXNM +; + +define @fmaxnm_h_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv8f16( %a, %splat) + ret %out +} + +define @fmaxnm_h_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv8f16( %a, %splat) + ret %out +} + +define @fmaxnm_4h_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_4h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f16( %a, %splat) + ret %out +} + +define @fmaxnm_4h_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f16( %a, %splat) + ret %out +} + +define @fmaxnm_2h_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_2h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f16( %a, %splat) + ret %out +} + +define @fmaxnm_2h_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f16( %a, %splat) + ret %out +} + +define @fmaxnm_s_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f32( %a, %splat) + ret %out +} + +define @fmaxnm_s_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f32( %a, %splat) + ret %out +} + +define @fmaxnm_2s_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_2s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f32( %a, %splat) + ret %out +} + +define @fmaxnm_2s_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f32( %a, %splat) + ret %out +} + +define @fmaxnm_d_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f64( %a, %splat) + ret %out +} + +define @fmaxnm_d_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f64( %a, %splat) + ret %out +} + +; +; FMIN +; + +define @fmin_h_immzero( %a) #0 { +; CHECK-LABEL: fmin_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv8f16( %a, %splat) + ret %out +} + +define @fmin_h_immone( %a) #0 { +; CHECK-LABEL: fmin_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv8f16( %a, %splat) + ret %out +} + +define @fmin_4h_immzero( %a) #0 { +; CHECK-LABEL: fmin_4h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f16( %a, %splat) + ret %out +} + +define @fmin_4h_immone( %a) #0 { +; CHECK-LABEL: fmin_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f16( %a, %splat) + ret %out +} + +define @fmin_2h_immzero( %a) #0 { +; CHECK-LABEL: fmin_2h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f16( %a, %splat) + ret %out +} + +define @fmin_2h_immone( %a) #0 { +; CHECK-LABEL: fmin_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f16( %a, %splat) + ret %out +} + +define @fmin_s_immzero( %a) #0 { +; CHECK-LABEL: fmin_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f32( %a, %splat) + ret %out +} + +define @fmin_s_immone( %a) #0 { +; CHECK-LABEL: fmin_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f32( %a, %splat) + ret %out +} + +define @fmin_2s_immzero( %a) #0 { +; CHECK-LABEL: fmin_2s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f32( %a, %splat) + ret %out +} + +define @fmin_2s_immone( %a) #0 { +; CHECK-LABEL: fmin_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f32( %a, %splat) + ret %out +} + +define @fmin_d_immzero( %a) #0 { +; CHECK-LABEL: fmin_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f64( %a, %splat) + ret %out +} + +define @fmin_d_immone( %a) #0 { +; CHECK-LABEL: fmin_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f64( %a, %splat) + ret %out +} + +; +; FMINNM +; + +define @fminnm_h_immzero( %a) #0 { +; CHECK-LABEL: fminnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv8f16( %a, %splat) + ret %out +} + +define @fminnm_h_immone( %a) #0 { +; CHECK-LABEL: fminnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv8f16( %a, %splat) + ret %out +} + +define @fminnm_4h_immzero( %a) #0 { +; CHECK-LABEL: fminnm_4h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f16( %a, %splat) + ret %out +} + +define @fminnm_4h_immone( %a) #0 { +; CHECK-LABEL: fminnm_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f16( %a, %splat) + ret %out +} + +define @fminnm_2h_immzero( %a) #0 { +; CHECK-LABEL: fminnm_2h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f16( %a, %splat) + ret %out +} + +define @fminnm_2h_immone( %a) #0 { +; CHECK-LABEL: fminnm_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f16( %a, %splat) + ret %out +} + +define @fminnm_s_immzero( %a) #0 { +; CHECK-LABEL: fminnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f32( %a, %splat) + ret %out +} + +define @fminnm_s_immone( %a) #0 { +; CHECK-LABEL: fminnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f32( %a, %splat) + ret %out +} + +define @fminnm_2s_immzero( %a) #0 { +; CHECK-LABEL: fminnm_2s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f32( %a, %splat) + ret %out +} + +define @fminnm_2s_immone( %a) #0 { +; CHECK-LABEL: fminnm_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f32( %a, %splat) + ret %out +} + +define @fminnm_d_immzero( %a) #0 { +; CHECK-LABEL: fminnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f64( %a, %splat) + ret %out +} + +define @fminnm_d_immone( %a) #0 { +; CHECK-LABEL: fminnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f64( %a, %splat) + ret %out +} + +; +; FMUL +; + +define @fmul_h_immhalf( %a) #0 { +; CHECK-LABEL: fmul_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_h_immtwo( %a) #0 { +; CHECK-LABEL: fmul_h_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z0.h +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_4h_immhalf( %a) #0 { +; CHECK-LABEL: fmul_4h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_4h_immtwo( %a) #0 { +; CHECK-LABEL: fmul_4h_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z0.h +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_2h_immhalf( %a) #0 { +; CHECK-LABEL: fmul_2h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_2h_immtwo( %a) #0 { +; CHECK-LABEL: fmul_2h_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z0.h +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_s_immhalf( %a) #0 { +; CHECK-LABEL: fmul_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_s_immtwo( %a) #0 { +; CHECK-LABEL: fmul_s_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z0.s +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_2s_immhalf( %a) #0 { +; CHECK-LABEL: fmul_2s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_2s_immtwo( %a) #0 { +; CHECK-LABEL: fmul_2s_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z0.s +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_d_immhalf( %a) #0 { +; CHECK-LABEL: fmul_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_d_immtwo( %a) #0 { +; CHECK-LABEL: fmul_d_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, z0.d, z0.d +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +; +; FSUB +; + +define @fsub_h_immhalf( %a) #0 { +; CHECK-LABEL: fsub_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_h_immone( %a) #0 { +; CHECK-LABEL: fsub_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_4h_immhalf( %a) #0 { +; CHECK-LABEL: fsub_4h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_4h_immone( %a) #0 { +; CHECK-LABEL: fsub_4h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_2h_immhalf( %a) #0 { +; CHECK-LABEL: fsub_2h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_2h_immone( %a) #0 { +; CHECK-LABEL: fsub_2h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_s_immhalf( %a) #0 { +; CHECK-LABEL: fsub_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_s_immone( %a) #0 { +; CHECK-LABEL: fsub_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_2s_immhalf( %a) #0 { +; CHECK-LABEL: fsub_2s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_2s_immone( %a) #0 { +; CHECK-LABEL: fsub_2s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_d_immhalf( %a) #0 { +; CHECK-LABEL: fsub_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_d_immone( %a) #0 { +; CHECK-LABEL: fsub_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +;; Arithmetic intrinsic declarations + +declare @llvm.maximum.nxv8f16(, ) +declare @llvm.maximum.nxv4f16(, ) +declare @llvm.maximum.nxv2f16(, ) +declare @llvm.maximum.nxv4f32(, ) +declare @llvm.maximum.nxv2f32(, ) +declare @llvm.maximum.nxv2f64(, ) + +declare @llvm.maxnum.nxv8f16(, ) +declare @llvm.maxnum.nxv4f16(, ) +declare @llvm.maxnum.nxv2f16(, ) +declare @llvm.maxnum.nxv4f32(, ) +declare @llvm.maxnum.nxv2f32(, ) +declare @llvm.maxnum.nxv2f64(, ) + +declare @llvm.minimum.nxv8f16(, ) +declare @llvm.minimum.nxv4f16(, ) +declare @llvm.minimum.nxv2f16(, ) +declare @llvm.minimum.nxv4f32(, ) +declare @llvm.minimum.nxv2f32(, ) +declare @llvm.minimum.nxv2f64(, ) + +declare @llvm.minnum.nxv8f16(, ) +declare @llvm.minnum.nxv4f16(, ) +declare @llvm.minnum.nxv2f16(, ) +declare @llvm.minnum.nxv4f32(, ) +declare @llvm.minnum.nxv2f32(, ) +declare @llvm.minnum.nxv2f64(, ) + +attributes #0 = { "target-features"="+sve" } +attributes #1 = { "target-features"="+sve,+use-experimental-zeroing-pseudos" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll new file mode 100644 index 00000000000000..eea6031fbd6b53 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll @@ -0,0 +1,1309 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @fadd_h_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fadd_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a, + %splat) + ret %out +} + + +define @fadd_h_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_h_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fadd_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fadd_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fadd_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fadd_s_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_s_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fadd_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fadd_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fadd_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fadd_d_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_d_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fadd_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fadd_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmax_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmax_h_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_h_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fmax_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmax_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmax_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmax_s_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_s_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fmax_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmax_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmax_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmax_d_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_d_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fmax_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmax_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_h_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_h_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_s_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_s_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_d_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_d_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmin_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmin_h_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_h_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fmin_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmin_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmin_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmin_s_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_s_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fmin_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmin_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immzero( %pg, %a) #0 { +; CHECK-LABEL: fmin_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmin_d_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_d_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fmin_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmin_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immzero( %pg, %a) #0 { +; CHECK-LABEL: fminnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_h_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_h_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fminnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immzero( %pg, %a) #0 { +; CHECK-LABEL: fminnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_s_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_s_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fminnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immzero( %pg, %a) #0 { +; CHECK-LABEL: fminnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_d_immzero_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_d_immzero_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fminnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fmul_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmul_h_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_h_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immtwo( %pg, %a) #0 { +; CHECK-LABEL: fmul_h_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmul_h_immtwo_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_h_immtwo_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fmul_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmul_s_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_s_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immtwo( %pg, %a) #0 { +; CHECK-LABEL: fmul_s_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmul_s_immtwo_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_s_immtwo_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fmul_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmul_d_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_d_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immtwo( %pg, %a) #0 { +; CHECK-LABEL: fmul_d_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmul_d_immtwo_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_d_immtwo_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fsub_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fsub_h_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_h_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immone( %pg, %a) #0 { +; CHECK-LABEL: fsub_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fsub_h_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_h_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fsub_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fsub_s_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_s_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immone( %pg, %a) #0 { +; CHECK-LABEL: fsub_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fsub_s_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_s_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immhalf( %pg, %a) #0 { +; CHECK-LABEL: fsub_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fsub_d_immhalf_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_d_immhalf_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immone( %pg, %a) #0 { +; CHECK-LABEL: fsub_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fsub_d_immone_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_d_immone_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + + +;; Arithmetic intrinsic declarations + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsubr.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsubr.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsubr.nxv2f64(, , ) + +attributes #0 = { "target-features"="+sve" } +attributes #1 = { "target-features"="+sve,+use-experimental-zeroing-pseudos" } diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll new file mode 100644 index 00000000000000..07899b9490cf11 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -0,0 +1,585 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +declare void @use(i32) +declare void @use1(i1) + +define i8 @testi16i8(i16 %add) { +; CHECK-LABEL: @testi16i8( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i32 @testi64i32(i64 %add) { +; CHECK-LABEL: @testi64i32( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + ret i32 %cond.i +} + +define i16 @testi32i16i8(i32 %add) { +; CHECK-LABEL: @testi32i16i8( +; CHECK-NEXT: [[A:%.*]] = add i32 [[ADD:%.*]], 128 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], 256 +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD]] to i16 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[ADD]], -1 +; CHECK-NEXT: [[F:%.*]] = select i1 [[C]], i16 127, i16 -128 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]] +; CHECK-NEXT: ret i16 [[R]] +; + %a = add i32 %add, 128 + %cmp = icmp ult i32 %a, 256 + %t = trunc i32 %add to i16 + %c = icmp sgt i32 %add, -1 + %f = select i1 %c, i16 127, i16 -128 + %r = select i1 %cmp, i16 %t, i16 %f + ret i16 %r +} + +define <4 x i16> @testv4i32i16i8(<4 x i32> %add) { +; CHECK-LABEL: @testv4i32i16i8( +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[ADD:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[A]], +; CHECK-NEXT: [[T:%.*]] = trunc <4 x i32> [[ADD]] to <4 x i16> +; CHECK-NEXT: [[C:%.*]] = icmp sgt <4 x i32> [[ADD]], +; CHECK-NEXT: [[F:%.*]] = select <4 x i1> [[C]], <4 x i16> , <4 x i16> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP]], <4 x i16> [[T]], <4 x i16> [[F]] +; CHECK-NEXT: ret <4 x i16> [[R]] +; + %a = add <4 x i32> %add, + %cmp = icmp ult <4 x i32> %a, + %t = trunc <4 x i32> %add to <4 x i16> + %c = icmp sgt <4 x i32> %add, + %f = select <4 x i1> %c, <4 x i16> , <4 x i16> + %r = select <4 x i1> %cmp, <4 x i16> %t, <4 x i16> %f + ret <4 x i16> %r +} + +define i32 @testi32i32i8(i32 %add) { +; CHECK-LABEL: @testi32i32i8( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127 +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %add, 128 + %cmp = icmp ult i32 %a, 256 + %c = icmp sgt i32 %add, -1 + %f = select i1 %c, i32 127, i32 -128 + %r = select i1 %cmp, i32 %add, i32 %f + ret i32 %r +} + +define i16 @test_truncfirst(i32 %add) { +; CHECK-LABEL: @test_truncfirst( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD:%.*]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[T]], -128 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[T]], i16 -128 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127 +; CHECK-NEXT: ret i16 [[R]] +; + %t = trunc i32 %add to i16 + %a = add i16 %t, 128 + %cmp = icmp ult i16 %a, 256 + %c = icmp sgt i16 %t, -1 + %f = select i1 %c, i16 127, i16 -128 + %r = select i1 %cmp, i16 %t, i16 %f + ret i16 %r +} + +define i16 @testtrunclowhigh(i32 %add, i16 %low, i16 %high) { +; CHECK-LABEL: @testtrunclowhigh( +; CHECK-NEXT: [[A:%.*]] = add i32 [[ADD:%.*]], 128 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], 256 +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD]] to i16 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[ADD]], -1 +; CHECK-NEXT: [[F:%.*]] = select i1 [[C]], i16 [[HIGH:%.*]], i16 [[LOW:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]] +; CHECK-NEXT: ret i16 [[R]] +; + %a = add i32 %add, 128 + %cmp = icmp ult i32 %a, 256 + %t = trunc i32 %add to i16 + %c = icmp sgt i32 %add, -1 + %f = select i1 %c, i16 %high, i16 %low + %r = select i1 %cmp, i16 %t, i16 %f + ret i16 %r +} + +define i32 @testi64i32addsat(i32 %a, i32 %b) { +; CHECK-LABEL: @testi64i32addsat( +; CHECK-NEXT: [[SA:%.*]] = sext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[SB:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SA]], [[SB]] +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sa = sext i32 %a to i64 + %sb = sext i32 %b to i64 + %add = add i64 %sa, %sb + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + ret i32 %cond.i +} + +define <4 x i8> @testv4i16i8(<4 x i16> %add) { +; CHECK-LABEL: @testv4i16i8( +; CHECK-NEXT: [[SH:%.*]] = lshr <4 x i16> [[ADD:%.*]], +; CHECK-NEXT: [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8> +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8> +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8> +; CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], +; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] +; CHECK-NEXT: ret <4 x i8> [[COND_I]] +; + %sh = lshr <4 x i16> %add, + %conv.i = trunc <4 x i16> %sh to <4 x i8> + %conv1.i = trunc <4 x i16> %add to <4 x i8> + %shr2.i = ashr <4 x i8> %conv1.i, + %cmp.not.i = icmp eq <4 x i8> %shr2.i, %conv.i + %shr4.i = ashr <4 x i16> %add, + %conv5.i = trunc <4 x i16> %shr4.i to <4 x i8> + %xor.i = xor <4 x i8> %conv5.i, + %cond.i = select <4 x i1> %cmp.not.i, <4 x i8> %conv1.i, <4 x i8> %xor.i + ret <4 x i8> %cond.i +} + +define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: @testv4i16i8add( +; CHECK-NEXT: [[SA:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[SB:%.*]] = sext <4 x i8> [[B:%.*]] to <4 x i16> +; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i16> [[SA]], [[SB]] +; CHECK-NEXT: [[SH:%.*]] = lshr <4 x i16> [[ADD]], +; CHECK-NEXT: [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8> +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8> +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8> +; CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], +; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] +; CHECK-NEXT: ret <4 x i8> [[COND_I]] +; + %sa = sext <4 x i8> %a to <4 x i16> + %sb = sext <4 x i8> %b to <4 x i16> + %add = add <4 x i16> %sa, %sb + %sh = lshr <4 x i16> %add, + %conv.i = trunc <4 x i16> %sh to <4 x i8> + %conv1.i = trunc <4 x i16> %add to <4 x i8> + %shr2.i = ashr <4 x i8> %conv1.i, + %cmp.not.i = icmp eq <4 x i8> %shr2.i, %conv.i + %shr4.i = ashr <4 x i16> %add, + %conv5.i = trunc <4 x i16> %shr4.i to <4 x i8> + %xor.i = xor <4 x i8> %conv5.i, + %cond.i = select <4 x i1> %cmp.not.i, <4 x i8> %conv1.i, <4 x i8> %xor.i + ret <4 x i8> %cond.i +} + +define i8 @testi16i8_revcmp(i16 %add) { +; CHECK-LABEL: @testi16i8_revcmp( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %conv.i, %shr2.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i8 @testi16i8_revselect(i16 %add) { +; CHECK-LABEL: @testi16i8_revselect( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I_NOT:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I_NOT]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp ne i8 %conv.i, %shr2.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %xor.i, i8 %conv1.i + ret i8 %cond.i +} + +define i8 @testi32i8(i32 %add) { +; CHECK-LABEL: @testi32i8( +; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i32 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i32 %add, 8 + %conv.i = trunc i32 %sh to i8 + %conv1.i = trunc i32 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i32 %add, 15 + %conv5.i = trunc i32 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i16 @differentconsts(i32 %x, i16 %replacement_low, i16 %replacement_high) { +; CHECK-LABEL: @differentconsts( +; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128 +; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i16 256, i16 -1 +; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16 +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144 +; CHECK-NEXT: [[T4:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i16 [[T4]], i16 [[T1]] +; CHECK-NEXT: ret i16 [[R]] +; + %t0 = icmp slt i32 %x, 128 + %t1 = select i1 %t0, i16 256, i16 65535 + %t2 = add i32 %x, 16 + %t3 = icmp ult i32 %t2, 144 + %t4 = trunc i32 %x to i16 + %r = select i1 %t3, i16 %t4, i16 %t1 + ret i16 %r +} + +define i8 @badimm1(i16 %add) { +; CHECK-LABEL: @badimm1( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 9 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 9 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i8 @badimm2(i16 %add) { +; CHECK-LABEL: @badimm2( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 6 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 6 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i8 @badimm3(i16 %add) { +; CHECK-LABEL: @badimm3( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 14 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i16 %add, 14 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 127 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +define i8 @badimm4(i16 %add) { +; CHECK-LABEL: @badimm4( +; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 126 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] +; CHECK-NEXT: ret i8 [[COND_I]] +; + %sh = lshr i16 %add, 8 + %conv.i = trunc i16 %sh to i8 + %conv1.i = trunc i16 %add to i8 + %shr2.i = ashr i8 %conv1.i, 7 + %cmp.not.i = icmp eq i8 %shr2.i, %conv.i + %shr4.i = ashr i16 %add, 15 + %conv5.i = trunc i16 %shr4.i to i8 + %xor.i = xor i8 %conv5.i, 126 + %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i + ret i8 %cond.i +} + +; One use checks + +define i32 @oneusexor(i64 %add) { +; CHECK-LABEL: @oneusexor( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: call void @use(i32 [[XOR_I]]) +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + call void @use(i32 %xor.i) + ret i32 %cond.i +} + +define i32 @oneuseconv(i64 %add) { +; CHECK-LABEL: @oneuseconv( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: call void @use(i32 [[CONV1_I]]) +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + call void @use(i32 %conv1.i) + ret i32 %cond.i +} + +define i32 @oneusecmp(i64 %add) { +; CHECK-LABEL: @oneusecmp( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: call void @use1(i1 [[CMP_NOT_I]]) +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + call void @use1(i1 %cmp.not.i) + ret i32 %cond.i +} + +define i32 @oneuseboth(i64 %add) { +; CHECK-LABEL: @oneuseboth( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: call void @use(i32 [[XOR_I]]) +; CHECK-NEXT: call void @use(i32 [[CONV1_I]]) +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + call void @use(i32 %xor.i) + call void @use(i32 %conv1.i) + ret i32 %cond.i +} + +define i32 @oneusethree(i64 %add) { +; CHECK-LABEL: @oneusethree( +; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 +; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: call void @use(i32 [[XOR_I]]) +; CHECK-NEXT: call void @use(i32 [[CONV1_I]]) +; CHECK-NEXT: call void @use1(i1 [[CMP_NOT_I]]) +; CHECK-NEXT: ret i32 [[COND_I]] +; + %sh = lshr i64 %add, 32 + %conv.i = trunc i64 %sh to i32 + %conv1.i = trunc i64 %add to i32 + %shr2.i = ashr i32 %conv1.i, 31 + %cmp.not.i = icmp eq i32 %shr2.i, %conv.i + %shr4.i = ashr i64 %add, 63 + %conv5.i = trunc i64 %shr4.i to i32 + %xor.i = xor i32 %conv5.i, 2147483647 + %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i + call void @use(i32 %xor.i) + call void @use(i32 %conv1.i) + call void @use1(i1 %cmp.not.i) + ret i32 %cond.i +} + +define i8 @C0zero(i8 %X, i8 %y, i8 %z) { +; CHECK-LABEL: @C0zero( +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], -10 +; CHECK-NEXT: [[F:%.*]] = select i1 [[C]], i8 [[Y:%.*]], i8 [[Z:%.*]] +; CHECK-NEXT: ret i8 [[F]] +; + %a = add i8 %X, 10 + %cmp = icmp ult i8 %a, 0 + %c = icmp slt i8 %X, -10 + %f = select i1 %c, i8 %y, i8 %z + %r = select i1 %cmp, i8 %X, i8 %f + ret i8 %r +} diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index bc78869f9c54d9..21533652b11c21 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -1081,6 +1081,20 @@ TEST_F(ConstantRangeTest, Multiply) { ConstantRange(APInt(8, -2), APInt(8, 1))); } +TEST_F(ConstantRangeTest, smul_fast) { + TestBinaryOpExhaustive( + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.smul_fast(CR2); + }, + [](const APInt &N1, const APInt &N2) { + return N1 * N2; + }, + PreferSmallest, + [](const ConstantRange &, const ConstantRange &) { + return false; // Check correctness only. + }); +} + TEST_F(ConstantRangeTest, UMax) { EXPECT_EQ(Full.umax(Full), Full); EXPECT_EQ(Full.umax(Empty), Empty); diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 795c81dc3f6f66..15a6b92d7c7139 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -110,6 +110,7 @@ static_library("Support") { "PrettyStackTrace.cpp", "RISCVAttributeParser.cpp", "RISCVAttributes.cpp", + "RISCVISAInfo.cpp", "RWMutex.cpp", "RandomNumberGenerator.cpp", "Regex.cpp",