diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6501a4870e2a6e..27ff9ddc70a34e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -110,6 +110,13 @@ Attribute Changes in Clang
   attribute is handled instead, e.g. in ``handleDeclAttribute``.
   (This was changed in order to better support attributes in code completion).
 
+- __has_cpp_attribute, __has_c_attribute, __has_attribute, and __has_declspec
+  will now macro expand their argument. This causes a change in behavior for
+  code using ``__has_cpp_attribute(__clang__::attr)`` (and same for
+  ``__has_c_attribute``) where it would previously expand to ``0`` for all
+  attributes, but will now issue an error due to the expansion of the
+  predefined ``__clang__`` macro.
+
 Windows Support
 ---------------
 
@@ -122,6 +129,9 @@ Windows Support
 C Language Changes in Clang
 ---------------------------
 
+- The value of ``__STDC_VERSION__`` has been bumped to ``202000L`` when passing
+  ``-std=c2x`` so that it can be distinguished from C17 mode. This value is
+  expected to change again when C23 is published.
 - Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'``
   are now ill-formed in all language modes. The motivation for this change is outlined in
   `P2362 <wg21.link/P2362>`_.
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index a28348a7af169b..6cb6d0b2d1fcc4 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -212,7 +212,7 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
         return "lp64d";
       return "lp64";
     }
-    llvm_unreachable();
+    llvm_unreachable("unhandled XLen");
   }
 
   // 3. Choose a default based on the triple
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index aa94b130cb1240..a3e1ca5d5226c7 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -371,7 +371,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   //      value is, are implementation-defined.
   // (Removed in C++20.)
   if (!LangOpts.CPlusPlus) {
-    if (LangOpts.C17)
+    // FIXME: Use correct value for C23.
+    if (LangOpts.C2x)
+      Builder.defineMacro("__STDC_VERSION__", "202000L");
+    else if (LangOpts.C17)
       Builder.defineMacro("__STDC_VERSION__", "201710L");
     else if (LangOpts.C11)
       Builder.defineMacro("__STDC_VERSION__", "201112L");
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index bf19f538647e6c..5a0fa5184e38bf 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1293,7 +1293,7 @@ static bool EvaluateHasIncludeNext(Token &Tok,
 /// integer values.
 static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
                                             Token &Tok, IdentifierInfo *II,
-                                            Preprocessor &PP,
+                                            Preprocessor &PP, bool ExpandArgs,
                                             llvm::function_ref<
                                               int(Token &Tok,
                                                   bool &HasLexedNextTok)> Op) {
@@ -1319,7 +1319,10 @@ static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
   bool SuppressDiagnostic = false;
   while (true) {
     // Parse next token.
-    PP.LexUnexpandedToken(Tok);
+    if (ExpandArgs)
+      PP.Lex(Tok);
+    else
+      PP.LexUnexpandedToken(Tok);
 
 already_lexed:
     switch (Tok.getKind()) {
@@ -1609,21 +1612,21 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     OS << CounterValue++;
     Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_feature) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                            diag::err_feature_check_malformed);
         return II && HasFeature(*this, II->getName());
       });
   } else if (II == Ident__has_extension) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                            diag::err_feature_check_malformed);
         return II && HasExtension(*this, II->getName());
       });
   } else if (II == Ident__has_builtin) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                            diag::err_feature_check_malformed);
@@ -1675,12 +1678,12 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
         }
       });
   } else if (II == Ident__is_identifier) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [](Token &Tok, bool &HasLexedNextToken) -> int {
         return Tok.is(tok::identifier);
       });
   } else if (II == Ident__has_attribute) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                            diag::err_feature_check_malformed);
@@ -1688,7 +1691,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
                                  getTargetInfo(), getLangOpts()) : 0;
       });
   } else if (II == Ident__has_declspec) {
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                            diag::err_feature_check_malformed);
@@ -1704,8 +1707,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
   } else if (II == Ident__has_cpp_attribute ||
              II == Ident__has_c_attribute) {
     bool IsCXX = II == Ident__has_cpp_attribute;
-    EvaluateFeatureLikeBuiltinMacro(
-        OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
+        [&](Token &Tok, bool &HasLexedNextToken) -> int {
           IdentifierInfo *ScopeII = nullptr;
           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
               Tok, *this, diag::err_feature_check_malformed);
@@ -1719,7 +1722,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
             HasLexedNextToken = true;
           else {
             ScopeII = II;
-            LexUnexpandedToken(Tok);
+            // Lex an expanded token for the attribute name.
+            Lex(Tok);
             II = ExpectFeatureIdentifierInfo(Tok, *this,
                                              diag::err_feature_check_malformed);
           }
@@ -1746,7 +1750,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         std::string WarningName;
         SourceLocation StrStartLoc = Tok.getLocation();
@@ -1777,7 +1781,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // The argument to this builtin should be an identifier. The
     // builtin evaluates to 1 when that identifier names the module we are
     // currently building.
-    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
       [this](Token &Tok, bool &HasLexedNextToken) -> int {
         IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
                                        diag::err_expected_id_building_module);
@@ -1837,28 +1841,32 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     return;
   } else if (II == Ident__is_target_arch) {
     EvaluateFeatureLikeBuiltinMacro(
-        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        OS, Tok, II, *this, false,
+        [this](Token &Tok, bool &HasLexedNextToken) -> int {
           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
               Tok, *this, diag::err_feature_check_malformed);
           return II && isTargetArch(getTargetInfo(), II);
         });
   } else if (II == Ident__is_target_vendor) {
     EvaluateFeatureLikeBuiltinMacro(
-        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        OS, Tok, II, *this, false,
+        [this](Token &Tok, bool &HasLexedNextToken) -> int {
           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
               Tok, *this, diag::err_feature_check_malformed);
           return II && isTargetVendor(getTargetInfo(), II);
         });
   } else if (II == Ident__is_target_os) {
     EvaluateFeatureLikeBuiltinMacro(
-        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        OS, Tok, II, *this, false,
+        [this](Token &Tok, bool &HasLexedNextToken) -> int {
           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
               Tok, *this, diag::err_feature_check_malformed);
           return II && isTargetOS(getTargetInfo(), II);
         });
   } else if (II == Ident__is_target_environment) {
     EvaluateFeatureLikeBuiltinMacro(
-        OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        OS, Tok, II, *this, false,
+        [this](Token &Tok, bool &HasLexedNextToken) -> int {
           IdentifierInfo *II = ExpectFeatureIdentifierInfo(
               Tok, *this, diag::err_feature_check_malformed);
           return II && isTargetEnvironment(getTargetInfo(), II);
diff --git a/clang/test/Preprocessor/c2x.c b/clang/test/Preprocessor/c2x.c
new file mode 100644
index 00000000000000..96fc9273a28685
--- /dev/null
+++ b/clang/test/Preprocessor/c2x.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c2x %s
+// expected-no-diagnostics
+
+// FIXME: Test the correct value once C23 ships.
+_Static_assert(__STDC_VERSION__ > 201710L, "Incorrect __STDC_VERSION__");
diff --git a/clang/test/Preprocessor/has_attribute.c b/clang/test/Preprocessor/has_attribute.c
index 4970dc5904230a..eef168e8791032 100644
--- a/clang/test/Preprocessor/has_attribute.c
+++ b/clang/test/Preprocessor/has_attribute.c
@@ -56,3 +56,11 @@ int has_no_volatile_attribute();
 
 #if __has_cpp_attribute(selectany) // expected-error {{function-like macro '__has_cpp_attribute' is not defined}}
 #endif
+
+// Test that macro expansion of the builtin argument works.
+#define F fallthrough
+
+#if __has_attribute(F)
+int has_fallthrough;
+#endif
+// CHECK: int has_fallthrough;
diff --git a/clang/test/Preprocessor/has_attribute.cpp b/clang/test/Preprocessor/has_attribute.cpp
index fe7d29f15de1af..bf0f9b3bc4a8f0 100644
--- a/clang/test/Preprocessor/has_attribute.cpp
+++ b/clang/test/Preprocessor/has_attribute.cpp
@@ -18,16 +18,6 @@ CXX11(clang::__fallthrough__)
 // CHECK: __gsl__::suppress: 0
 CXX11(__gsl__::suppress)
 
-// We do somewhat support the __clang__ vendor namespace, but it is a
-// predefined macro and thus we encourage users to use _Clang instead.
-// Because of this, we do not support __has_cpp_attribute for that
-// vendor namespace.
-//
-// Note, we can't use CXX11 here because it will expand __clang__ to 1
-// too early.
-// CHECK: 1::fallthrough: 0
-__clang__::fallthrough: __has_cpp_attribute(__clang__::fallthrough)
-
 // CHECK: _Clang::fallthrough: 201603L
 CXX11(_Clang::fallthrough)
 
@@ -70,6 +60,50 @@ CXX11(unlikely)
 // CHECK: noreturn: 200809L
 // CHECK: unlikely: 201803L
 
+namespace PR48462 {
+// Test that macro expansion of the builtin argument works.
+#define C clang
+#define F fallthrough
+#define CF clang::fallthrough
+
+#if __has_cpp_attribute(F)
+int has_fallthrough;
+#endif
+// CHECK: int has_fallthrough;
+
+#if __has_cpp_attribute(C::F)
+int has_clang_falthrough_1;
+#endif
+// CHECK: int has_clang_falthrough_1;
+
+#if __has_cpp_attribute(clang::F)
+int has_clang_falthrough_2;
+#endif
+// CHECK: int has_clang_falthrough_2;
+
+#if __has_cpp_attribute(C::fallthrough)
+int has_clang_falthrough_3;
+#endif
+// CHECK: int has_clang_falthrough_3;
+
+#if __has_cpp_attribute(CF)
+int has_clang_falthrough_4;
+#endif
+// CHECK: int has_clang_falthrough_4;
+
+#define FUNCLIKE1(x) clang::x
+#if __has_cpp_attribute(FUNCLIKE1(fallthrough))
+int funclike_1;
+#endif
+// CHECK: int funclike_1;
+
+#define FUNCLIKE2(x) _Clang::x
+#if __has_cpp_attribute(FUNCLIKE2(fallthrough))
+int funclike_2;
+#endif
+// CHECK: int funclike_2;
+}
+
 // Test for Microsoft __declspec attributes
 
 #define DECLSPEC(x) x: __has_declspec_attribute(x)
@@ -81,3 +115,13 @@ DECLSPEC(__uuid__)
 
 // CHECK: fallthrough: 0
 DECLSPEC(fallthrough)
+
+namespace PR48462 {
+// Test that macro expansion of the builtin argument works.
+#define U uuid
+
+#if __has_declspec_attribute(U)
+int has_uuid;
+#endif
+// CHECK: int has_uuid;
+}
diff --git a/clang/test/Preprocessor/has_attribute_errors.cpp b/clang/test/Preprocessor/has_attribute_errors.cpp
new file mode 100644
index 00000000000000..1fc88d3f926fbd
--- /dev/null
+++ b/clang/test/Preprocessor/has_attribute_errors.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -Eonly -verify %s
+
+// We warn users if they write an attribute like
+// [[__clang__::fallthrough]] because __clang__ is a macro that expands to 1.
+// Instead, we suggest users use [[_Clang::fallthrough]] in this situation.
+// However, because __has_cpp_attribute (and __has_c_attribute) require
+// expanding their argument tokens, __clang__ expands to 1 in the feature test
+// macro as well. We don't currently give users a kind warning in this case,
+// but we previously did not expand macros and so this would return 0. Now that
+// we properly expand macros, users will now get an error about using incorrect
+// syntax.
+
+__has_cpp_attribute(__clang__::fallthrough) // expected-error {{missing ')' after <numeric_constant>}} \
+                                            // expected-note {{to match this '('}} \
+                                            // expected-error {{builtin feature check macro requires a parenthesized identifier}}
+
diff --git a/clang/test/Preprocessor/has_c_attribute.c b/clang/test/Preprocessor/has_c_attribute.c
index 670e42a97926ef..36dd1c80e7802e 100644
--- a/clang/test/Preprocessor/has_c_attribute.c
+++ b/clang/test/Preprocessor/has_c_attribute.c
@@ -33,12 +33,45 @@ C2x(__gnu__::warn_unused_result)
 // CHECK: gnu::__warn_unused_result__: 201904L
 C2x(gnu::__warn_unused_result__)
 
-// We do somewhat support the __clang__ vendor namespace, but it is a
-// predefined macro and thus we encourage users to use _Clang instead.
-// Because of this, we do not support __has_c_attribute for that
-// vendor namespace.
-//
-// Note, we can't use C2x here because it will expand __clang__ to 1
-// too early.
-// CHECK: 1::fallthrough: 0
-__clang__::fallthrough: __has_c_attribute(__clang__::fallthrough)
+// Test that macro expansion of the builtin argument works.
+#define C clang
+#define L likely
+#define CL clang::likely
+#define N nodiscard
+
+#if __has_c_attribute(N)
+int has_nodiscard;
+#endif
+// CHECK: int has_nodiscard;
+
+#if __has_c_attribute(C::L)
+int has_clang_likely_1;
+#endif
+// CHECK: int has_clang_likely_1;
+
+#if __has_c_attribute(clang::L)
+int has_clang_likely_2;
+#endif
+// CHECK: int has_clang_likely_2;
+
+#if __has_c_attribute(C::likely)
+int has_clang_likely_3;
+#endif
+// CHECK: int has_clang_likely_3;
+
+#if __has_c_attribute(CL)
+int has_clang_likely_4;
+#endif
+// CHECK: int has_clang_likely_4;
+
+#define FUNCLIKE1(x) clang::x
+#if __has_c_attribute(FUNCLIKE1(likely))
+int funclike_1;
+#endif
+// CHECK: int funclike_1;
+
+#define FUNCLIKE2(x) _Clang::x
+#if __has_c_attribute(FUNCLIKE2(likely))
+int funclike_2;
+#endif
+// CHECK: int funclike_2;
diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
index 0863850e4f1145..c2e852d941c04a 100644
--- a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
+++ b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
@@ -16,108 +16,134 @@
 #include "gtest/gtest.h"
 #include "tsan_rtl.h"
 
+#if SANITIZER_MAC || !defined(__x86_64__)
+// These tests are currently crashing on Mac:
+// https://reviews.llvm.org/D107911
+// and on ppc64: https://reviews.llvm.org/D110546#3025422
+// due to the way we create thread contexts
+// (but they crashed on Mac with normal pthread_create as well).
+// There must be some difference in thread initialization
+// between normal execution and unit tests.
+#  define TRACE_TEST(SUITE, NAME) TEST(SUITE, DISABLED_##NAME)
+#else
+#  define TRACE_TEST(SUITE, NAME) TEST(SUITE, NAME)
+#endif
+
 namespace __tsan {
 
 using namespace v3;
 
 // We need to run all trace tests in a new thread,
 // so that the thread trace is empty initially.
-static void run_in_thread(void *(*f)(void *), void *arg = nullptr) {
-  pthread_t th;
-  pthread_create(&th, nullptr, f, arg);
-  pthread_join(th, nullptr);
-}
-
-#if SANITIZER_MAC
-// These tests are currently failing on Mac.
-// See https://reviews.llvm.org/D107911 for more details.
-#  define MAYBE_RestoreAccess DISABLED_RestoreAccess
-#  define MAYBE_MemoryAccessSize DISABLED_MemoryAccessSize
-#  define MAYBE_RestoreMutexLock DISABLED_RestoreMutexLock
-#  define MAYBE_MultiPart DISABLED_MultiPart
-#else
-#  define MAYBE_RestoreAccess RestoreAccess
-#  define MAYBE_MemoryAccessSize MemoryAccessSize
-#  define MAYBE_RestoreMutexLock RestoreMutexLock
-#  define MAYBE_MultiPart MultiPart
-#endif
+template <uptr N>
+struct ThreadArray {
+  ThreadArray() {
+    for (auto *&thr : threads) {
+      thr = static_cast<ThreadState *>(
+          MmapOrDie(sizeof(ThreadState), "ThreadState"));
+      Tid tid = ThreadCreate(cur_thread(), 0, 0, true);
+      Processor *proc = ProcCreate();
+      ProcWire(proc, thr);
+      ThreadStart(thr, tid, 0, ThreadType::Fiber);
+    }
+  }
 
-TEST(Trace, MAYBE_RestoreAccess) {
-  struct Thread {
-    static void *Func(void *arg) {
-      // A basic test with some function entry/exit events,
-      // some mutex lock/unlock events and some other distracting
-      // memory events.
-      ThreadState *thr = cur_thread();
-      TraceFunc(thr, 0x1000);
-      TraceFunc(thr, 0x1001);
-      TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
-      TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001);
-      TraceMutexUnlock(thr, 0x5000);
-      TraceFunc(thr);
-      CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead));
-      TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002);
-      TraceFunc(thr, 0x1002);
-      CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead));
-      // This is the access we want to find.
-      // The previous one is equivalent, but RestoreStack must prefer
-      // the last of the matchig accesses.
-      CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
-      Lock lock1(&ctx->slot_mtx);
-      ThreadRegistryLock lock2(&ctx->thread_registry);
-      VarSizeStackTrace stk;
-      MutexSet mset;
-      uptr tag = kExternalTagNone;
-      bool res =
-          RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
-                       thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag);
-      CHECK(res);
-      CHECK_EQ(stk.size, 3);
-      CHECK_EQ(stk.trace[0], 0x1000);
-      CHECK_EQ(stk.trace[1], 0x1002);
-      CHECK_EQ(stk.trace[2], 0x2002);
-      CHECK_EQ(mset.Size(), 2);
-      CHECK_EQ(mset.Get(0).addr, 0x5001);
-      CHECK_EQ(mset.Get(0).stack_id, 0x6001);
-      CHECK_EQ(mset.Get(0).write, true);
-      CHECK_EQ(mset.Get(1).addr, 0x5002);
-      CHECK_EQ(mset.Get(1).stack_id, 0x6002);
-      CHECK_EQ(mset.Get(1).write, false);
-      CHECK_EQ(tag, kExternalTagNone);
-      return nullptr;
+  ~ThreadArray() {
+    for (uptr i = 0; i < N; i++) {
+      if (threads[i])
+        Finish(i);
     }
-  };
-  run_in_thread(Thread::Func);
+  }
+
+  void Finish(uptr i) {
+    auto *thr = threads[i];
+    threads[i] = nullptr;
+    Processor *proc = thr->proc();
+    ThreadFinish(thr);
+    ProcUnwire(proc, thr);
+    ProcDestroy(proc);
+    UnmapOrDie(thr, sizeof(ThreadState));
+  }
+
+  ThreadState *threads[N];
+  ThreadState *operator[](uptr i) { return threads[i]; }
+  ThreadState *operator->() { return threads[0]; }
+  operator ThreadState *() { return threads[0]; }
+};
+
+TRACE_TEST(Trace, RestoreAccess) {
+  // A basic test with some function entry/exit events,
+  // some mutex lock/unlock events and some other distracting
+  // memory events.
+  ThreadArray<1> thr;
+  TraceFunc(thr, 0x1000);
+  TraceFunc(thr, 0x1001);
+  TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
+  TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001);
+  TraceMutexUnlock(thr, 0x5000);
+  TraceFunc(thr);
+  CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead));
+  TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002);
+  TraceFunc(thr, 0x1002);
+  CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead));
+  // This is the access we want to find.
+  // The previous one is equivalent, but RestoreStack must prefer
+  // the last of the matchig accesses.
+  CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
+  Lock lock1(&ctx->slot_mtx);
+  ThreadRegistryLock lock2(&ctx->thread_registry);
+  VarSizeStackTrace stk;
+  MutexSet mset;
+  uptr tag = kExternalTagNone;
+  bool res =
+      RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
+                   0x3000, 8, kAccessRead, &stk, &mset, &tag);
+  CHECK(res);
+  CHECK_EQ(stk.size, 3);
+  CHECK_EQ(stk.trace[0], 0x1000);
+  CHECK_EQ(stk.trace[1], 0x1002);
+  CHECK_EQ(stk.trace[2], 0x2002);
+  CHECK_EQ(mset.Size(), 2);
+  CHECK_EQ(mset.Get(0).addr, 0x5001);
+  CHECK_EQ(mset.Get(0).stack_id, 0x6001);
+  CHECK_EQ(mset.Get(0).write, true);
+  CHECK_EQ(mset.Get(1).addr, 0x5002);
+  CHECK_EQ(mset.Get(1).stack_id, 0x6002);
+  CHECK_EQ(mset.Get(1).write, false);
+  CHECK_EQ(tag, kExternalTagNone);
 }
 
-TEST(Trace, MAYBE_MemoryAccessSize) {
-  struct Thread {
-    struct Params {
-      uptr access_size, offset, size;
-      bool res;
-      int type;
-    };
-    static void *Func(void *arg) {
-      // Test tracing and matching of accesses of different sizes.
-      const Params *params = static_cast<Params *>(arg);
+TRACE_TEST(Trace, MemoryAccessSize) {
+  // Test tracing and matching of accesses of different sizes.
+  struct Params {
+    uptr access_size, offset, size;
+    bool res;
+  };
+  Params tests[] = {
+      {1, 0, 1, true},  {4, 0, 2, true},
+      {4, 2, 2, true},  {8, 3, 1, true},
+      {2, 1, 1, true},  {1, 1, 1, false},
+      {8, 5, 4, false}, {4, static_cast<uptr>(-1l), 4, false},
+  };
+  for (auto params : tests) {
+    for (int type = 0; type < 3; type++) {
+      ThreadArray<1> thr;
       Printf("access_size=%zu, offset=%zu, size=%zu, res=%d, type=%d\n",
-             params->access_size, params->offset, params->size, params->res,
-             params->type);
-      ThreadState *thr = cur_thread();
+             params.access_size, params.offset, params.size, params.res, type);
       TraceFunc(thr, 0x1000);
-      switch (params->type) {
+      switch (type) {
         case 0:
           // This should emit compressed event.
-          CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params->access_size,
+          CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, params.access_size,
                                      kAccessRead));
           break;
         case 1:
           // This should emit full event.
-          CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000,
-                                     params->access_size, kAccessRead));
+          CHECK(TryTraceMemoryAccess(thr, 0x2000000, 0x3000, params.access_size,
+                                     kAccessRead));
           break;
         case 2:
-          TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params->access_size,
+          TraceMemoryAccessRange(thr, 0x2000000, 0x3000, params.access_size,
                                  kAccessRead);
           break;
       }
@@ -127,105 +153,82 @@ TEST(Trace, MAYBE_MemoryAccessSize) {
       MutexSet mset;
       uptr tag = kExternalTagNone;
       bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
-                              thr->epoch, 0x3000 + params->offset, params->size,
+                              thr->epoch, 0x3000 + params.offset, params.size,
                               kAccessRead, &stk, &mset, &tag);
-      CHECK_EQ(res, params->res);
-      if (params->res) {
+      CHECK_EQ(res, params.res);
+      if (params.res) {
         CHECK_EQ(stk.size, 2);
         CHECK_EQ(stk.trace[0], 0x1000);
-        CHECK_EQ(stk.trace[1], params->type ? 0x2000000 : 0x2000);
+        CHECK_EQ(stk.trace[1], type ? 0x2000000 : 0x2000);
       }
-      return nullptr;
     }
-  };
-  Thread::Params tests[] = {
-      {1, 0, 1, true, 0},  {4, 0, 2, true, 0},
-      {4, 2, 2, true, 0},  {8, 3, 1, true, 0},
-      {2, 1, 1, true, 0},  {1, 1, 1, false, 0},
-      {8, 5, 4, false, 0}, {4, static_cast<uptr>(-1l), 4, false, 0},
-  };
-  for (auto params : tests) {
-    for (params.type = 0; params.type < 3; params.type++)
-      run_in_thread(Thread::Func, &params);
   }
 }
 
-TEST(Trace, MAYBE_RestoreMutexLock) {
-  struct Thread {
-    static void *Func(void *arg) {
-      // Check of restoration of a mutex lock event.
-      ThreadState *thr = cur_thread();
-      TraceFunc(thr, 0x1000);
-      TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
-      TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
-      TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002);
-      Lock lock1(&ctx->slot_mtx);
-      ThreadRegistryLock lock2(&ctx->thread_registry);
-      VarSizeStackTrace stk;
-      MutexSet mset;
-      uptr tag = kExternalTagNone;
-      bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid,
-                              thr->epoch, 0x5001, 0, 0, &stk, &mset, &tag);
-      CHECK(res);
-      CHECK_EQ(stk.size, 2);
-      CHECK_EQ(stk.trace[0], 0x1000);
-      CHECK_EQ(stk.trace[1], 0x4002);
-      CHECK_EQ(mset.Size(), 2);
-      CHECK_EQ(mset.Get(0).addr, 0x5000);
-      CHECK_EQ(mset.Get(0).stack_id, 0x6000);
-      CHECK_EQ(mset.Get(0).write, true);
-      CHECK_EQ(mset.Get(1).addr, 0x5001);
-      CHECK_EQ(mset.Get(1).stack_id, 0x6001);
-      CHECK_EQ(mset.Get(1).write, false);
-      return nullptr;
-    }
-  };
-  run_in_thread(Thread::Func);
+TRACE_TEST(Trace, RestoreMutexLock) {
+  // Check of restoration of a mutex lock event.
+  ThreadArray<1> thr;
+  TraceFunc(thr, 0x1000);
+  TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
+  TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
+  TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002);
+  Lock lock1(&ctx->slot_mtx);
+  ThreadRegistryLock lock2(&ctx->thread_registry);
+  VarSizeStackTrace stk;
+  MutexSet mset;
+  uptr tag = kExternalTagNone;
+  bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch,
+                          0x5001, 0, 0, &stk, &mset, &tag);
+  CHECK(res);
+  CHECK_EQ(stk.size, 2);
+  CHECK_EQ(stk.trace[0], 0x1000);
+  CHECK_EQ(stk.trace[1], 0x4002);
+  CHECK_EQ(mset.Size(), 2);
+  CHECK_EQ(mset.Get(0).addr, 0x5000);
+  CHECK_EQ(mset.Get(0).stack_id, 0x6000);
+  CHECK_EQ(mset.Get(0).write, true);
+  CHECK_EQ(mset.Get(1).addr, 0x5001);
+  CHECK_EQ(mset.Get(1).stack_id, 0x6001);
+  CHECK_EQ(mset.Get(1).write, false);
 }
 
-TEST(Trace, MAYBE_MultiPart) {
-  struct Thread {
-    static void *Func(void *arg) {
-      // Check replay of a trace with multiple parts.
-      ThreadState *thr = cur_thread();
-      TraceFunc(thr, 0x1000);
-      TraceFunc(thr, 0x2000);
-      TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
-      const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event);
-      for (uptr i = 0; i < kEvents; i++) {
-        TraceFunc(thr, 0x3000);
-        TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002);
-        TraceMutexUnlock(thr, 0x5002);
-        TraceFunc(thr);
-      }
-      TraceFunc(thr, 0x4000);
-      TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
-      CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
-      Lock lock1(&ctx->slot_mtx);
-      ThreadRegistryLock lock2(&ctx->thread_registry);
-      VarSizeStackTrace stk;
-      MutexSet mset;
-      uptr tag = kExternalTagNone;
-      bool res =
-          RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
-                       thr->epoch, 0x3000, 8, kAccessRead, &stk, &mset, &tag);
-      CHECK(res);
-      CHECK_EQ(stk.size, 4);
-      CHECK_EQ(stk.trace[0], 0x1000);
-      CHECK_EQ(stk.trace[1], 0x2000);
-      CHECK_EQ(stk.trace[2], 0x4000);
-      CHECK_EQ(stk.trace[3], 0x2002);
-      CHECK_EQ(mset.Size(), 2);
-      CHECK_EQ(mset.Get(0).addr, 0x5000);
-      CHECK_EQ(mset.Get(0).stack_id, 0x6000);
-      CHECK_EQ(mset.Get(0).write, true);
-      CHECK_EQ(mset.Get(1).addr, 0x5001);
-      CHECK_EQ(mset.Get(1).stack_id, 0x6001);
-      CHECK_EQ(mset.Get(1).write, false);
-      return nullptr;
-    }
-  };
-  run_in_thread(Thread::Func);
+TRACE_TEST(Trace, MultiPart) {
+  // Check replay of a trace with multiple parts.
+  ThreadArray<1> thr;
+  TraceFunc(thr, 0x1000);
+  TraceFunc(thr, 0x2000);
+  TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
+  const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event);
+  for (uptr i = 0; i < kEvents; i++) {
+    TraceFunc(thr, 0x3000);
+    TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002);
+    TraceMutexUnlock(thr, 0x5002);
+    TraceFunc(thr);
+  }
+  TraceFunc(thr, 0x4000);
+  TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
+  CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
+  Lock lock1(&ctx->slot_mtx);
+  ThreadRegistryLock lock2(&ctx->thread_registry);
+  VarSizeStackTrace stk;
+  MutexSet mset;
+  uptr tag = kExternalTagNone;
+  bool res =
+      RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
+                   0x3000, 8, kAccessRead, &stk, &mset, &tag);
+  CHECK(res);
+  CHECK_EQ(stk.size, 4);
+  CHECK_EQ(stk.trace[0], 0x1000);
+  CHECK_EQ(stk.trace[1], 0x2000);
+  CHECK_EQ(stk.trace[2], 0x4000);
+  CHECK_EQ(stk.trace[3], 0x2002);
+  CHECK_EQ(mset.Size(), 2);
+  CHECK_EQ(mset.Get(0).addr, 0x5000);
+  CHECK_EQ(mset.Get(0).stack_id, 0x6000);
+  CHECK_EQ(mset.Get(0).write, true);
+  CHECK_EQ(mset.Get(1).addr, 0x5001);
+  CHECK_EQ(mset.Get(1).stack_id, 0x6001);
+  CHECK_EQ(mset.Get(1).write, false);
 }
 
 }  // namespace __tsan
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 88f5740765de6a..5e17f7a3b977dc 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -837,6 +837,42 @@ void OmpStructureChecker::Leave(const parser::OmpEndSectionsDirective &x) {
   }
 }
 
+void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
+    const parser::OmpObjectList &objList) {
+  for (const auto &ompObject : objList.v) {
+    std::visit(
+        common::visitors{
+            [&](const parser::Designator &) {
+              if (const auto *name{parser::Unwrap<parser::Name>(ompObject)}) {
+                const auto &scope{context_.FindScope(name->symbol->name())};
+                if (FindCommonBlockContaining(*name->symbol)) {
+                  context_.Say(name->source,
+                      "A variable in a %s directive cannot be an element of a "
+                      "common block"_err_en_US,
+                      ContextDirectiveAsFortran());
+                } else if (!IsSave(*name->symbol) &&
+                    scope.kind() != Scope::Kind::MainProgram &&
+                    scope.kind() != Scope::Kind::Module) {
+                  context_.Say(name->source,
+                      "A variable that appears in a %s directive must be "
+                      "declared in the scope of a module or have the SAVE "
+                      "attribute, either explicitly or implicitly"_err_en_US,
+                      ContextDirectiveAsFortran());
+                }
+                if (FindEquivalenceSet(*name->symbol)) {
+                  context_.Say(name->source,
+                      "A variable in a %s directive cannot appear in an "
+                      "EQUIVALENCE statement"_err_en_US,
+                      ContextDirectiveAsFortran());
+                }
+              }
+            },
+            [&](const parser::Name &) {}, // common block
+        },
+        ompObject.u);
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OpenMPThreadprivate &c) {
   const auto &dir{std::get<parser::Verbatim>(c.t)};
   PushContextAndClauseSets(
@@ -847,6 +883,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPThreadprivate &c) {
   const auto &dir{std::get<parser::Verbatim>(c.t)};
   const auto &objectList{std::get<parser::OmpObjectList>(c.t)};
   CheckIsVarPartOfAnotherVar(dir.source, objectList);
+  CheckThreadprivateOrDeclareTargetVar(objectList);
   dirContext_.pop_back();
 }
 
@@ -892,7 +929,25 @@ void OmpStructureChecker::Enter(const parser::OpenMPDeclareTargetConstruct &x) {
   }
 }
 
-void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &) {
+void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
+  const auto &dir{std::get<parser::Verbatim>(x.t)};
+  const auto &spec{std::get<parser::OmpDeclareTargetSpecifier>(x.t)};
+  if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
+    CheckIsVarPartOfAnotherVar(dir.source, *objectList);
+    CheckThreadprivateOrDeclareTargetVar(*objectList);
+  } else if (const auto *clauseList{
+                 parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
+    for (const auto &clause : clauseList->v) {
+      if (const auto *toClause{std::get_if<parser::OmpClause::To>(&clause.u)}) {
+        CheckIsVarPartOfAnotherVar(dir.source, toClause->v);
+        CheckThreadprivateOrDeclareTargetVar(toClause->v);
+      } else if (const auto *linkClause{
+                     std::get_if<parser::OmpClause::Link>(&clause.u)}) {
+        CheckIsVarPartOfAnotherVar(dir.source, linkClause->v);
+        CheckThreadprivateOrDeclareTargetVar(linkClause->v);
+      }
+    }
+  }
   dirContext_.pop_back();
 }
 
@@ -1635,7 +1690,8 @@ bool OmpStructureChecker::IsDataRefTypeParamInquiry(
 void OmpStructureChecker::CheckIsVarPartOfAnotherVar(
     const parser::CharBlock &source, const parser::OmpObjectList &objList) {
   OmpDirectiveSet nonPartialVarSet{llvm::omp::Directive::OMPD_allocate,
-      llvm::omp::Directive::OMPD_threadprivate};
+      llvm::omp::Directive::OMPD_threadprivate,
+      llvm::omp::Directive::OMPD_declare_target};
   for (const auto &ompObject : objList.v) {
     std::visit(
         common::visitors{
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index d82bf0032a2439..bf98f360ed58b4 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -211,6 +211,8 @@ class OmpStructureChecker
   bool IsDataRefTypeParamInquiry(const parser::DataRef *dataRef);
   void CheckIsVarPartOfAnotherVar(
       const parser::CharBlock &source, const parser::OmpObjectList &objList);
+  void CheckThreadprivateOrDeclareTargetVar(
+      const parser::OmpObjectList &objList);
   void CheckIntentInPointer(
       const parser::OmpObjectList &, const llvm::omp::Clause);
   void GetSymbolsInObjectList(const parser::OmpObjectList &, SymbolSourceMap &);
diff --git a/flang/test/Semantics/omp-declarative-directive.f90 b/flang/test/Semantics/omp-declarative-directive.f90
index 15744d9797935e..b9b39a30968745 100644
--- a/flang/test/Semantics/omp-declarative-directive.f90
+++ b/flang/test/Semantics/omp-declarative-directive.f90
@@ -44,13 +44,20 @@ module m2
 contains
   subroutine foo
     !$omp declare target
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target (foo, N, M)
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target to(Q, S) link(R)
     !ERROR: MAP clause is not allowed on the DECLARE TARGET directive
     !$omp declare target map(from:Q)
     integer, parameter :: N=10000, M=1024
     integer :: i
     real :: Q(N, N), R(N,M), S(M,M)
+    !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp threadprivate(i)
   end subroutine foo
 end module m2
diff --git a/flang/test/Semantics/omp-declare-target01.f90 b/flang/test/Semantics/omp-declare-target01.f90
new file mode 100644
index 00000000000000..972468fd9d0655
--- /dev/null
+++ b/flang/test/Semantics/omp-declare-target01.f90
@@ -0,0 +1,113 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+! OpenMP Version 5.1
+! Check OpenMP construct validity for the following directives:
+! 2.14.7 Declare Target Directive
+
+module declare_target01
+  use omp_lib
+  type my_type(kind_param, len_param)
+    integer, KIND :: kind_param
+    integer, LEN :: len_param
+    integer :: t_i
+    integer :: t_arr(10)
+  end type my_type
+
+  type(my_type(2, 4)) :: my_var, my_var2
+  integer :: arr(10), arr2(10)
+  integer(kind=4) :: x, x2
+  character(len=32) :: w, w2
+  integer, dimension(:), allocatable :: y, y2
+
+  !$omp declare target (my_var)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target (my_var%t_i)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target (my_var%t_arr)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target (my_var%kind_param)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target (my_var%len_param)
+
+  !$omp declare target (arr)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target (arr(1))
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target (arr(1:2))
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target (x%KIND)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target (w%LEN)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target (y%KIND)
+
+  !$omp declare target to (my_var)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (my_var%t_i)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (my_var%t_arr)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (my_var%kind_param)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (my_var%len_param)
+
+  !$omp declare target to (arr)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (arr(1))
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (arr(1:2))
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (x%KIND)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (w%LEN)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target to (y%KIND)
+
+  !$omp declare target link (my_var2)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (my_var2%t_i)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (my_var2%t_arr)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (my_var2%kind_param)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (my_var2%len_param)
+
+  !$omp declare target link (arr2)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (arr2(1))
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (arr2(1:2))
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (x2%KIND)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (w2%LEN)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target link (y2%KIND)
+end
diff --git a/flang/test/Semantics/omp-declare-target02.f90 b/flang/test/Semantics/omp-declare-target02.f90
new file mode 100644
index 00000000000000..2ef5df51c6727b
--- /dev/null
+++ b/flang/test/Semantics/omp-declare-target02.f90
@@ -0,0 +1,176 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+! OpenMP Version 5.1
+! Check OpenMP construct validity for the following directives:
+! 2.14.7 Declare Target Directive
+
+program declare_target02
+  integer :: arr1(10), arr1_to(10), arr1_link(10)
+  common /blk1/ a1, a1_to, a1_link
+  real, save :: eq_a, eq_b, eq_c, eq_d
+
+
+  !$omp declare target (arr1)
+
+  !$omp declare target (blk1)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target (a1)
+
+  !$omp declare target to (arr1_to)
+
+  !$omp declare target to (blk1_to)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target to (a1_to)
+
+  !$omp declare target link (arr1_link)
+
+  !$omp declare target link (blk1_link)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target link (a1_link)
+
+  equivalence(eq_a, eq_b)
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target (eq_a)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target to (eq_a)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target link (eq_b)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target (eq_c)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target to (eq_c)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target link (eq_d)
+  equivalence(eq_c, eq_d)
+
+contains
+  subroutine func()
+    integer :: arr2(10), arr2_to(10), arr2_link(10)
+    integer, save :: arr3(10), arr3_to(10), arr3_link(10)
+    common /blk2/ a2, a2_to, a2_link
+    common /blk3/ a3, a3_to, a3_link
+    save /blk3/
+
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target (arr2)
+
+    !$omp declare target (arr3)
+
+    !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target (blk2)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target (a2)
+
+    !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target (blk3)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target (a3)
+
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target to (arr2_to)
+
+    !$omp declare target to (arr3_to)
+
+    !ERROR: Implicitly typed local entity 'blk2_to' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target to (blk2_to)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target to (a2_to)
+
+    !ERROR: Implicitly typed local entity 'blk3_to' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target to (blk3_to)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target to (a3_to)
+
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target link (arr2_link)
+
+    !$omp declare target link (arr3_link)
+
+    !ERROR: Implicitly typed local entity 'blk2_link' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target link (blk2_link)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target link (a2_link)
+
+    !ERROR: Implicitly typed local entity 'blk3_link' not allowed in specification expression
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target link (blk3_link)
+
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target link (a3_link)
+  end
+end
+
+module mod4
+  integer :: arr4(10), arr4_to(10), arr4_link(10)
+  common /blk4/ a4, a4_to, a4_link
+
+  !$omp declare target (arr4)
+
+  !$omp declare target (blk4)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target (a4)
+
+  !$omp declare target to (arr4_to)
+
+  !$omp declare target to (blk4_to)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target to (a4_to)
+
+  !$omp declare target link (arr4_link)
+
+  !$omp declare target link (blk4_link)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target link (a4_link)
+end
+
+subroutine func5()
+  integer :: arr5(10), arr5_to(10), arr5_link(10)
+  common /blk5/ a5, a5_to, a5_link
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target (arr5)
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target (blk5)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target (a5)
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target to (arr5_to)
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target to (blk5_to)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target to (a5_to)
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target link (arr5_link)
+
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target link (blk5_link)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target link (a5_link)
+end
diff --git a/flang/test/Semantics/omp-threadprivate02.f90 b/flang/test/Semantics/omp-threadprivate02.f90
new file mode 100644
index 00000000000000..4a4034908f40ae
--- /dev/null
+++ b/flang/test/Semantics/omp-threadprivate02.f90
@@ -0,0 +1,89 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+! OpenMP Version 5.1
+! Check OpenMP construct validity for the following directives:
+! 2.21.2 Threadprivate Directive
+
+program threadprivate02
+  integer :: arr1(10)
+  common /blk1/ a1
+  real, save :: eq_a, eq_b, eq_c, eq_d
+
+  !$omp threadprivate(arr1)
+
+  !$omp threadprivate(/blk1/)
+
+  !$omp threadprivate(blk1)
+
+  !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block
+  !$omp threadprivate(a1)
+
+  equivalence(eq_a, eq_b)
+  !ERROR: A variable in a THREADPRIVATE directive cannot appear in an EQUIVALENCE statement
+  !$omp threadprivate(eq_a)
+
+  !ERROR: A variable in a THREADPRIVATE directive cannot appear in an EQUIVALENCE statement
+  !$omp threadprivate(eq_c)
+  equivalence(eq_c, eq_d)
+
+contains
+  subroutine func()
+    integer :: arr2(10)
+    integer, save :: arr3(10)
+    common /blk2/ a2
+    common /blk3/ a3
+    save /blk3/
+
+    !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp threadprivate(arr2)
+
+    !$omp threadprivate(arr3)
+
+    !$omp threadprivate(/blk2/)
+
+    !ERROR: Implicitly typed local entity 'blk2' not allowed in specification expression
+    !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp threadprivate(blk2)
+
+    !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block
+    !$omp threadprivate(a2)
+
+    !$omp threadprivate(/blk3/)
+
+    !ERROR: Implicitly typed local entity 'blk3' not allowed in specification expression
+    !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp threadprivate(blk3)
+
+    !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block
+    !$omp threadprivate(a3)
+  end
+end
+
+module mod4
+  integer :: arr4(10)
+  common /blk4/ a4
+
+  !$omp threadprivate(arr4)
+
+  !$omp threadprivate(/blk4/)
+
+  !$omp threadprivate(blk4)
+
+  !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block
+  !$omp threadprivate(a4)
+end
+
+subroutine func5()
+  integer :: arr5(10)
+  common /blk5/ a5
+
+  !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp threadprivate(arr5)
+
+  !$omp threadprivate(/blk5/)
+
+  !ERROR: A variable that appears in a THREADPRIVATE directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp threadprivate(blk5)
+
+  !ERROR: A variable in a THREADPRIVATE directive cannot be an element of a common block
+  !$omp threadprivate(a5)
+end
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 44b8c395c89e2e..e464d29afdacac 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -383,6 +383,11 @@ class LLVM_NODISCARD ConstantRange {
   /// treating both this and \p Other as unsigned ranges.
   ConstantRange multiply(const ConstantRange &Other) const;
 
+  /// Return range of possible values for a signed multiplication of this and
+  /// \p Other. However, if overflow is possible always return a full range
+  /// rather than trying to determine a more precise result.
+  ConstantRange smul_fast(const ConstantRange &Other) const;
+
   /// Return a new range representing the possible values resulting
   /// from a signed maximum of a value in this range and a value in \p Other.
   ConstantRange smax(const ConstantRange &Other) const;
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3129da27053f29..865db9f798326a 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1302,7 +1302,7 @@ AliasResult BasicAAResult::aliasGEP(
             computeConstantRange(Var.Val.V, true, &AC, Var.CxtI));
         if (!R.isFullSet() && !R.isEmptySet())
           VarIndexRange = R.sextOrTrunc(Var.Scale.getBitWidth())
-                              .multiply(ConstantRange(Var.Scale));
+                              .smul_fast(ConstantRange(Var.Scale));
       } else if (DecompGEP1.VarIndices.size() == 2) {
         // VarIndex = Scale*V0 + (-Scale)*V1.
         // If V0 != V1 then abs(VarIndex) >= abs(Scale).
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index d8b4262a811425..6877a5d278ac54 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -1054,6 +1054,25 @@ ConstantRange::multiply(const ConstantRange &Other) const {
   return UR.isSizeStrictlySmallerThan(SR) ? UR : SR;
 }
 
+ConstantRange ConstantRange::smul_fast(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return getEmpty();
+
+  APInt Min = getSignedMin();
+  APInt Max = getSignedMax();
+  APInt OtherMin = Other.getSignedMin();
+  APInt OtherMax = Other.getSignedMax();
+
+  bool O1, O2, O3, O4;
+  auto Muls = {Min.smul_ov(OtherMin, O1), Min.smul_ov(OtherMax, O2),
+               Max.smul_ov(OtherMin, O3), Max.smul_ov(OtherMax, O4)};
+  if (O1 || O2 || O3 || O4)
+    return getFull();
+
+  auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+  return getNonEmpty(std::min(Muls, Compare), std::max(Muls, Compare) + 1);
+}
+
 ConstantRange
 ConstantRange::smax(const ConstantRange &Other) const {
   // X smax Y is: range(smax(X_smin, Y_smin),
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 33ce7df2bddade..ecd89b64b8ea35 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1216,6 +1216,18 @@ def fpimm0 : FPImmLeaf<fAny, [{
   return Imm.isExactlyValue(+0.0);
 }]>;
 
+def fpimm_half : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+0.5);
+}]>;
+
+def fpimm_one : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimm_two : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+2.0);
+}]>;
+
 def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">,
   GISDNodeXFormEquiv<fpimm16XForm>;
 def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7bd891a2acdc5b..cb83f787a59845 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -405,14 +405,34 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe",  int_aarch64_sve_frecpe_x>;
   defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
 
-  defm FADD_ZPmI    : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
-  defm FSUB_ZPmI    : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
-  defm FMUL_ZPmI    : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>;
-  defm FSUBR_ZPmI   : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>;
-  defm FMAXNM_ZPmI  : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>;
-  defm FMINNM_ZPmI  : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>;
-  defm FMAX_ZPmI    : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
-  defm FMIN_ZPmI    : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
+  defm FADD_ZPmI    : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+  defm FSUB_ZPmI    : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+  defm FMUL_ZPmI    : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+  defm FSUBR_ZPmI   : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+  defm FMAXNM_ZPmI  : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+  defm FMINNM_ZPmI  : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+  defm FMAX_ZPmI    : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+  defm FMIN_ZPmI    : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+   
+  defm FADD_ZPZI    : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>;
+  defm FSUB_ZPZI    : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>;
+  defm FMUL_ZPZI    : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>;
+  defm FSUBR_ZPZI   : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one>;
+  defm FMAXNM_ZPZI  : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>;
+  defm FMINNM_ZPZI  : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>;
+  defm FMAX_ZPZI    : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>;
+  defm FMIN_ZPZI    : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmin_p>;
+
+  let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+    defm FADD_ZPZI    : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+    defm FSUB_ZPZI    : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+    defm FMUL_ZPZI    : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+    defm FSUBR_ZPZI   : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+    defm FMAXNM_ZPZI  : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+    defm FMINNM_ZPZI  : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+    defm FMAX_ZPZI    : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+    defm FMIN_ZPZI    : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+  }
 
   defm FADD_ZPmZ   : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
   defm FSUB_ZPmZ   : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 490e08a89471cc..fcb96c3b19db95 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -490,6 +490,21 @@ class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
 : Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
       (inst $Rn, i32:$imm)>;
 
+class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
+                          ValueType pt, ValueType it,
+                          FPImmLeaf immL, int imm,
+                          Instruction inst>
+: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))),
+      (inst $Pg, $Zs1, imm)>;
+
+class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
+                              ValueType pt, ValueType it,
+                              FPImmLeaf immL, int imm,
+                              Instruction inst>
+: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)),
+                      (vt (AArch64dup (it immL))))),
+      (inst $Pg, $Zs1, imm)>;
+
 //
 // Pseudo -> Instruction mappings
 //
@@ -1745,10 +1760,19 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
   let ElementSize = zprty.ElementSize;
 }
 
-multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
-  def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
-  def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
-  def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+  let DestructiveInstType = DestructiveBinaryImm in {
+  def _H : SVEPseudo2Instr<Ps # _H, 1>, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
+  def _S : SVEPseudo2Instr<Ps # _S, 1>, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
+  def _D : SVEPseudo2Instr<Ps # _D, 1>, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+  }
+
+  def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_D")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_D")>;
 }
 
 class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
@@ -1846,6 +1870,40 @@ multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
             (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
 }
 
+multiclass sve_fp_2op_i_p_zds_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator ir_op = null_frag> {
+  def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesUndef>;
+  def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesUndef>;
+  def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesUndef>;
+
+  def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_UNDEF_D")>;
+  def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_UNDEF_D")>;
+}
+
+multiclass sve_fp_2op_i_p_zds_zeroing_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+  def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesZero>;
+  def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesZero>;
+  def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesZero>;
+
+  let AddedComplexity = 2 in {
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_ZERO_H")>;
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_ZERO_H")>;
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_ZERO_S")>;
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_ZERO_S")>;
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_ZERO_D")>;
+    def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_ZERO_D")>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Arithmetic - Unpredicated Group
 //===----------------------------------------------------------------------===//
@@ -8371,3 +8429,4 @@ multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
   def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1,  nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
   def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1,  nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
 }
+
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 14b98f3c659081..f45bc81eeba882 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5231,6 +5231,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {2, MVT::v4i64, 4},   // (load 8i64 and) deinterleave into 2 x 4i64
       {2, MVT::v8i64, 8},   // (load 16i64 and) deinterleave into 2 x 8i64
       {2, MVT::v16i64, 16}, // (load 32i64 and) deinterleave into 2 x 16i64
+      {2, MVT::v32i64, 32}, // (load 64i64 and) deinterleave into 2 x 32i64
 
       {3, MVT::v2i8, 3},   // (load 6i8 and) deinterleave into 3 x 2i8
       {3, MVT::v4i8, 3},   // (load 12i8 and) deinterleave into 3 x 4i8
@@ -5248,6 +5249,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {3, MVT::v4i32, 3},   // (load 12i32 and) deinterleave into 3 x 4i32
       {3, MVT::v8i32, 7},   // (load 24i32 and) deinterleave into 3 x 8i32
       {3, MVT::v16i32, 14}, // (load 48i32 and) deinterleave into 3 x 16i32
+      {3, MVT::v32i32, 32}, // (load 96i32 and) deinterleave into 3 x 32i32
 
       {3, MVT::v2i64, 1},   // (load 6i64 and) deinterleave into 3 x 2i64
       {3, MVT::v4i64, 5},   // (load 12i64 and) deinterleave into 3 x 4i64
@@ -5270,10 +5272,12 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {4, MVT::v4i32, 8},   // (load 16i32 and) deinterleave into 4 x 4i32
       {4, MVT::v8i32, 16},  // (load 32i32 and) deinterleave into 4 x 8i32
       {4, MVT::v16i32, 32}, // (load 64i32 and) deinterleave into 4 x 16i32
+      {4, MVT::v32i32, 68}, // (load 128i32 and) deinterleave into 4 x 32i32
 
       {4, MVT::v2i64, 6},  // (load 8i64 and) deinterleave into 4 x 2i64
       {4, MVT::v4i64, 8},  // (load 16i64 and) deinterleave into 4 x 4i64
       {4, MVT::v8i64, 20}, // (load 32i64 and) deinterleave into 4 x 8i64
+      {4, MVT::v16i64, 40}, // (load 64i64 and) deinterleave into 4 x 16i64
 
       {6, MVT::v2i8, 6},   // (load 12i8 and) deinterleave into 6 x 2i8
       {6, MVT::v4i8, 14},  // (load 24i8 and) deinterleave into 6 x 4i8
@@ -5285,6 +5289,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {6, MVT::v4i16, 9},    // (load 24i16 and) deinterleave into 6 x 4i16
       {6, MVT::v8i16, 39},   // (load 48i16 and) deinterleave into 6 x 8i16
       {6, MVT::v16i16, 106}, // (load 96i16 and) deinterleave into 6 x 16i16
+      {6, MVT::v32i16, 212}, // (load 192i16 and) deinterleave into 6 x 32i16
 
       {6, MVT::v2i32, 6},   // (load 12i32 and) deinterleave into 6 x 2i32
       {6, MVT::v4i32, 15},  // (load 24i32 and) deinterleave into 6 x 4i32
@@ -5335,6 +5340,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {2, MVT::v4i64, 4},   // interleave 2 x 4i64 into 8i64 (and store)
       {2, MVT::v8i64, 8},   // interleave 2 x 8i64 into 16i64 (and store)
       {2, MVT::v16i64, 16}, // interleave 2 x 16i64 into 32i64 (and store)
+      {2, MVT::v32i64, 32}, // interleave 2 x 32i64 into 64i64 (and store)
 
       {3, MVT::v2i8, 4},   // interleave 3 x 2i8 into 6i8 (and store)
       {3, MVT::v4i8, 4},   // interleave 3 x 4i8 into 12i8 (and store)
@@ -5352,6 +5358,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {3, MVT::v4i32, 5},   // interleave 3 x 4i32 into 12i32 (and store)
       {3, MVT::v8i32, 11},  // interleave 3 x 8i32 into 24i32 (and store)
       {3, MVT::v16i32, 22}, // interleave 3 x 16i32 into 48i32 (and store)
+      {3, MVT::v32i32, 48}, // interleave 3 x 32i32 into 96i32 (and store)
 
       {3, MVT::v2i64, 4},   // interleave 3 x 2i64 into 6i64 (and store)
       {3, MVT::v4i64, 6},   // interleave 3 x 4i64 into 12i64 (and store)
@@ -5374,10 +5381,12 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {4, MVT::v4i32, 6},   // interleave 4 x 4i32 into 16i32 (and store)
       {4, MVT::v8i32, 16},  // interleave 4 x 8i32 into 32i32 (and store)
       {4, MVT::v16i32, 32}, // interleave 4 x 16i32 into 64i32 (and store)
+      {4, MVT::v32i32, 64}, // interleave 4 x 32i32 into 128i32 (and store)
 
       {4, MVT::v2i64, 6},  // interleave 4 x 2i64 into 8i64 (and store)
       {4, MVT::v4i64, 8},  // interleave 4 x 4i64 into 16i64 (and store)
       {4, MVT::v8i64, 20}, // interleave 4 x 8i64 into 32i64 (and store)
+      {4, MVT::v16i64, 40}, // interleave 4 x 16i64 into 64i64 (and store)
 
       {6, MVT::v2i8, 7},   // interleave 6 x 2i8 into 12i8 (and store)
       {6, MVT::v4i8, 9},   // interleave 6 x 4i8 into 24i8 (and store)
@@ -5389,6 +5398,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
       {6, MVT::v4i16, 15},  // interleave 6 x 4i16 into 24i16 (and store)
       {6, MVT::v8i16, 21},  // interleave 6 x 8i16 into 48i16 (and store)
       {6, MVT::v16i16, 58}, // interleave 6 x 16i16 into 96i16 (and store)
+      {6, MVT::v32i16, 90}, // interleave 6 x 32i16 into 192i16 (and store)
 
       {6, MVT::v2i32, 9},   // interleave 6 x 2i32 into 12i32 (and store)
       {6, MVT::v4i32, 12},  // interleave 6 x 4i32 into 24i32 (and store)
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll
index 80a9215b87cca1..6079f87b8e0dec 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load float, float* %in0, align 4
-; AVX2: LV: Found an estimated cost of 228 for VF 32 For instruction:   %v0 = load float, float* %in0, align 4
+; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction:   %v0 = load float, float* %in0, align 4
 
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load float, float* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll
index 4970d17942258c..5fe0776d699a43 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   %v0 = load float, float* %in0, align 4
-; AVX2: LV: Found an estimated cost of 304 for VF 32 For instruction:   %v0 = load float, float* %in0, align 4
+; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction:   %v0 = load float, float* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load float, float* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load float, float* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
index 67629c58d42c9a..160ada094daa86 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction:   %v0 = load double, double* %in0, align 8
-; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction:   %v0 = load double, double* %in0, align 8
+; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction:   %v0 = load double, double* %in0, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load double, double* %in0, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
index 31b7c415a9a327..5b7fe79b7cfed0 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction:   %v0 = load double, double* %in0, align 8
-; AVX2: LV: Found an estimated cost of 128 for VF 16 For instruction:   %v0 = load double, double* %in0, align 8
+; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction:   %v0 = load double, double* %in0, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load double, double* %in0, align 8
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load double, double* %in0, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
index 7a9a052e87866a..64dd8b185ecc20 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX2: LV: Found an estimated cost of 112 for VF 16 For instruction:   %v0 = load i16, i16* %in0, align 2
-; AVX2: LV: Found an estimated cost of 516 for VF 32 For instruction:   %v0 = load i16, i16* %in0, align 2
+; AVX2: LV: Found an estimated cost of 224 for VF 32 For instruction:   %v0 = load i16, i16* %in0, align 2
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX512: LV: Found an estimated cost of 13 for VF 2 For instruction:   %v0 = load i16, i16* %in0, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
index caa98c9160b6a3..1462f2d28388c4 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 188 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
index 2eab36508045ee..f92b236dd164a8 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 100 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll
index d827effecc3835..4cd9372ca072ed 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 276 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
index 78c209fa106145..d84f9f30da418d 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 280 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
index 2a3cabfdf97adc..b32ff5b10fea8c 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 192 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll
index d1660b54645df2..9965ae82249b17 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 104 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll
index c1e591bed1ce2d..ececeef5330ee5 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 368 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 84 for VF 32 For instruction:   %v0 = load i32, i32* %in0, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i32, i32* %in0, align 4
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load i32, i32* %in0, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
index 84e208971455c4..6d24cd248677ba 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction:   %v0 = load i64, i64* %in0, align 8
-; AVX2: LV: Found an estimated cost of 208 for VF 32 For instruction:   %v0 = load i64, i64* %in0, align 8
+; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction:   %v0 = load i64, i64* %in0, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load i64, i64* %in0, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
index 9f6e3807d2d558..fc57e813101612 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction:   %v0 = load i64, i64* %in0, align 8
-; AVX2: LV: Found an estimated cost of 208 for VF 16 For instruction:   %v0 = load i64, i64* %in0, align 8
+; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction:   %v0 = load i64, i64* %in0, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i64, i64* %in0, align 8
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   %v0 = load i64, i64* %in0, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll
index 875639f212593a..9931665f5179bd 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction:   store float %v2, float* %out2, align 4
 ; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction:   store float %v2, float* %out2, align 4
 ; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction:   store float %v2, float* %out2, align 4
-; AVX2: LV: Found an estimated cost of 228 for VF 32 For instruction:   store float %v2, float* %out2, align 4
+; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction:   store float %v2, float* %out2, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store float %v2, float* %out2, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   store float %v2, float* %out2, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll
index d77ab5609549d5..09312c05f87150 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction:   store float %v3, float* %out3, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   store float %v3, float* %out3, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   store float %v3, float* %out3, align 4
-; AVX2: LV: Found an estimated cost of 304 for VF 32 For instruction:   store float %v3, float* %out3, align 4
+; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction:   store float %v3, float* %out3, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store float %v3, float* %out3, align 4
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   store float %v3, float* %out3, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll
index 186cd4a5186eca..de6bfc43c3d797 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction:   store double %v1, double* %out1, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction:   store double %v1, double* %out1, align 8
 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction:   store double %v1, double* %out1, align 8
-; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction:   store double %v1, double* %out1, align 8
+; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction:   store double %v1, double* %out1, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store double %v1, double* %out1, align 8
 ; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction:   store double %v1, double* %out1, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll
index 0e9773e0d08c45..fdea21c31b2dce 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction:   store double %v3, double* %out3, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction:   store double %v3, double* %out3, align 8
 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction:   store double %v3, double* %out3, align 8
-; AVX2: LV: Found an estimated cost of 128 for VF 16 For instruction:   store double %v3, double* %out3, align 8
+; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction:   store double %v3, double* %out3, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store double %v3, double* %out3, align 8
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   store double %v3, double* %out3, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll
index b4693d1c3916a6..05ace8cf22333b 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction:   store i16 %v5, i16* %out5, align 2
 ; AVX2: LV: Found an estimated cost of 24 for VF 8 For instruction:   store i16 %v5, i16* %out5, align 2
 ; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction:   store i16 %v5, i16* %out5, align 2
-; AVX2: LV: Found an estimated cost of 516 for VF 32 For instruction:   store i16 %v5, i16* %out5, align 2
+; AVX2: LV: Found an estimated cost of 102 for VF 32 For instruction:   store i16 %v5, i16* %out5, align 2
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i16 %v5, i16* %out5, align 2
 ; AVX512: LV: Found an estimated cost of 13 for VF 2 For instruction:   store i16 %v5, i16* %out5, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll
index 80b6531f9d38df..844cdc627d608b 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction:   store i32 %v2, i32* %out2, align 4
 ; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction:   store i32 %v2, i32* %out2, align 4
 ; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction:   store i32 %v2, i32* %out2, align 4
-; AVX2: LV: Found an estimated cost of 276 for VF 32 For instruction:   store i32 %v2, i32* %out2, align 4
+; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction:   store i32 %v2, i32* %out2, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %v2, i32* %out2, align 4
 ; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction:   store i32 %v2, i32* %out2, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll
index a5e2645bf1e972..1dd952d18082ae 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction:   store i32 %v3, i32* %out3, align 4
 ; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction:   store i32 %v3, i32* %out3, align 4
 ; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction:   store i32 %v3, i32* %out3, align 4
-; AVX2: LV: Found an estimated cost of 368 for VF 32 For instruction:   store i32 %v3, i32* %out3, align 4
+; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction:   store i32 %v3, i32* %out3, align 4
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %v3, i32* %out3, align 4
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   store i32 %v3, i32* %out3, align 4
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll
index 06d1c6e0c8629a..0f58fe6c969227 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction:   store i64 %v1, i64* %out1, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction:   store i64 %v1, i64* %out1, align 8
 ; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction:   store i64 %v1, i64* %out1, align 8
-; AVX2: LV: Found an estimated cost of 208 for VF 32 For instruction:   store i64 %v1, i64* %out1, align 8
+; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction:   store i64 %v1, i64* %out1, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i64 %v1, i64* %out1, align 8
 ; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction:   store i64 %v1, i64* %out1, align 8
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll
index c48b1d7aa41f3c..92da4c8515471a 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction:   store i64 %v3, i64* %out3, align 8
 ; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction:   store i64 %v3, i64* %out3, align 8
 ; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction:   store i64 %v3, i64* %out3, align 8
-; AVX2: LV: Found an estimated cost of 208 for VF 16 For instruction:   store i64 %v3, i64* %out3, align 8
+; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction:   store i64 %v3, i64* %out3, align 8
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i64 %v3, i64* %out3, align 8
 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction:   store i64 %v3, i64* %out3, align 8
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll
new file mode 100644
index 00000000000000..8c688e6266924a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll
@@ -0,0 +1,1071 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; FADD
+;
+
+define <vscale x 8 x half> @fadd_h_immhalf(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fadd_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fadd <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fadd_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fadd_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fadd <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fadd_4h_immhalf(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fadd_4h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fadd <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fadd_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fadd_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fadd <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fadd_2h_immhalf(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fadd_2h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fadd_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fadd_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immhalf(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fadd_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fadd <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fadd_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fadd <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fadd_2s_immhalf(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fadd_2s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fadd_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fadd_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+
+define <vscale x 2 x double> @fadd_d_immhalf(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fadd_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fadd_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fadd_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fadd <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FMAX
+;
+
+define <vscale x 8 x half> @fmax_h_immzero(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmax_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmax_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmax_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fmax_4h_immzero(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmax_4h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fmax_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmax_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fmax_2h_immzero(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmax_2h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fmax_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmax_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immzero(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmax_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmax_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fmax_2s_immzero(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmax_2s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fmax_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmax_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immzero(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmax_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmax_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FMAXNM
+;
+
+define <vscale x 8 x half> @fmaxnm_h_immzero(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmaxnm_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fmaxnm_4h_immzero(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_4h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fmaxnm_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fmaxnm_2h_immzero(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_2h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fmaxnm_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immzero(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fmaxnm_2s_immzero(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_2s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fmaxnm_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immzero(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmaxnm_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmaxnm_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FMIN
+;
+
+define <vscale x 8 x half> @fmin_h_immzero(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmin_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmin_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmin_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fmin_4h_immzero(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmin_4h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fmin_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmin_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fmin_2h_immzero(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmin_2h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fmin_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmin_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immzero(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmin_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmin_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fmin_2s_immzero(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmin_2s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fmin_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmin_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immzero(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmin_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmin_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FMINNM
+;
+
+define <vscale x 8 x half> @fminnm_h_immzero(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fminnm_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fminnm_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fminnm_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fminnm_4h_immzero(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fminnm_4h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fminnm_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fminnm_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fminnm_2h_immzero(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fminnm_2h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fminnm_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fminnm_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immzero(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fminnm_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fminnm_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fminnm_2s_immzero(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fminnm_2s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fminnm_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fminnm_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %splat)
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immzero(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fminnm_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fminnm_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FMUL
+;
+
+define <vscale x 8 x half> @fmul_h_immhalf(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmul_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fmul <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmul_h_immtwo(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmul_h_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fmul <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fmul_4h_immhalf(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmul_4h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fmul <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fmul_4h_immtwo(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fmul_4h_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fmul <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fmul_2h_immhalf(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmul_2h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fmul_2h_immtwo(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fmul_2h_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immhalf(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmul_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fmul <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immtwo(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmul_s_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fmul <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fmul_2s_immhalf(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmul_2s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fmul_2s_immtwo(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fmul_2s_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z0.s
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immhalf(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmul_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immtwo(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmul_d_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.d, z0.d, z0.d
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fmul <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FSUB
+;
+
+define <vscale x 8 x half> @fsub_h_immhalf(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fsub_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fsub <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fsub_h_immone(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fsub_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = fsub <vscale x 8 x half> %a, %splat
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x half> @fsub_4h_immhalf(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fsub_4h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fsub <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 4 x half> @fsub_4h_immone(<vscale x 4 x half> %a) #0 {
+; CHECK-LABEL: fsub_4h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fsub <vscale x 4 x half> %a, %splat
+  ret <vscale x 4 x half> %out
+}
+
+define <vscale x 2 x half> @fsub_2h_immhalf(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fsub_2h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 2 x half> @fsub_2h_immone(<vscale x 2 x half> %a) #0 {
+; CHECK-LABEL: fsub_2h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x half> %a, %splat
+  ret <vscale x 2 x half> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immhalf(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fsub_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fsub <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immone(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fsub_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = fsub <vscale x 4 x float> %a, %splat
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x float> @fsub_2s_immhalf(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fsub_2s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x float> @fsub_2s_immone(<vscale x 2 x float> %a) #0 {
+; CHECK-LABEL: fsub_2s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x float> %a, %splat
+  ret <vscale x 2 x float> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immhalf(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fsub_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immone(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fsub_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = fsub <vscale x 2 x double> %a, %splat
+  ret <vscale x 2 x double> %out
+}
+
+;; Arithmetic intrinsic declarations
+
+declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+attributes #0 = { "target-features"="+sve" }
+attributes #1 = { "target-features"="+sve,+use-experimental-zeroing-pseudos" }
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll
new file mode 100644
index 00000000000000..eea6031fbd6b53
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-imm.ll
@@ -0,0 +1,1309 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x half> @fadd_h_immhalf(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fadd_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+
+define <vscale x 8 x half> @fadd_h_immhalf_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fadd_h_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fadd_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fadd_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fadd_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fadd_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immhalf(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fadd_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immhalf_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fadd_s_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fadd_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fadd_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fadd_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fadd_d_immhalf(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fadd_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fadd_d_immhalf_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fadd_d_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fadd_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fadd_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fadd_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fadd_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fmax_h_immzero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmax_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmax_h_immzero_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmax_h_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmax_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmax_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmax_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmax_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immzero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmax_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immzero_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmax_s_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmax_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmax_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmax_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immzero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmax_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immzero_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmax_d_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmax_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmax_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmax_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fmaxnm_h_immzero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmaxnm_h_immzero_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmaxnm_h_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a_z,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmaxnm_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmaxnm_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmaxnm_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmaxnm_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a_z,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immzero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immzero_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmaxnm_s_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a_z,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmaxnm_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmaxnm_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a_z,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immzero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmaxnm_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immzero_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmaxnm_d_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a_z,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmaxnm_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmaxnm_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a_z,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fmin_h_immzero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmin_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmin_h_immzero_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmin_h_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmin_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmin_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmin_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmin_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immzero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmin_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immzero_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmin_s_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmin_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmin_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmin_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immzero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmin_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immzero_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmin_d_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmin_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmin_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmin_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fminnm_h_immzero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fminnm_h_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fminnm_h_immzero_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fminnm_h_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a_z,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fminnm_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fminnm_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fminnm_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fminnm_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
+                                                              <vscale x 8 x half> %a_z,
+                                                              <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immzero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fminnm_s_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immzero_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fminnm_s_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a_z,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fminnm_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fminnm_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fminnm_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x float> %a_z,
+                                                               <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immzero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fminnm_d_immzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immzero_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fminnm_d_immzero_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a_z,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fminnm_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fminnm_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fminnm_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                <vscale x 2 x double> %a_z,
+                                                                <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fmul_h_immhalf(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmul_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmul_h_immhalf_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmul_h_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmul_h_immtwo(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fmul_h_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fmul_h_immtwo_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fmul_h_immtwo_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immhalf(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmul_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immhalf_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmul_s_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immtwo(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fmul_s_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fmul_s_immtwo_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fmul_s_immtwo_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immhalf(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmul_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immhalf_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmul_d_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immtwo(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fmul_d_immtwo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fmul_d_immtwo_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fmul_d_immtwo_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, #2.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 2.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fsub_h_immhalf(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fsub_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fsub_h_immhalf_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fsub_h_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fsub_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fsub_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fsub_h_immone_zero(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fsub_h_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
+                                                            <vscale x 8 x half> %a_z,
+                                                            <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immhalf(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fsub_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immhalf_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fsub_s_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fsub_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fsub_s_immone_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fsub_s_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+                                                             <vscale x 4 x float> %a_z,
+                                                             <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immhalf(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fsub_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immhalf_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fsub_d_immhalf_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fsub_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fsub_d_immone_zero(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fsub_d_immone_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+                                                              <vscale x 2 x double> %a_z,
+                                                              <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @fsubr_h_immhalf(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fsubr_h_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsubr z0.h, p0/m, z0.h, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> %pg,
+                                                             <vscale x 8 x half> %a_z,
+                                                             <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fsubr_h_immone(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #1 {
+; CHECK-LABEL: fsubr_h_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT:    fsubr z0.h, p0/m, z0.h, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> zeroinitializer
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> %pg,
+                                                             <vscale x 8 x half> %a_z,
+                                                             <vscale x 8 x half> %splat)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fsubr_s_immhalf(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fsubr_s_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsubr z0.s, p0/m, z0.s, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
+                                                              <vscale x 4 x float> %a_z,
+                                                              <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fsubr_s_immone(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #1 {
+; CHECK-LABEL: fsubr_s_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT:    fsubr z0.s, p0/m, z0.s, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> zeroinitializer
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
+                                                              <vscale x 4 x float> %a_z,
+                                                              <vscale x 4 x float> %splat)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsubr_d_immhalf(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fsubr_d_immhalf:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsubr z0.d, p0/m, z0.d, #0.5
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
+                                                               <vscale x 2 x double> %a_z,
+                                                               <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fsubr_d_immone(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #1 {
+; CHECK-LABEL: fsubr_d_immone:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT:    fsubr z0.d, p0/m, z0.d, #1.0
+; CHECK-NEXT:    ret
+  %elt   = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
+  %splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
+                                                               <vscale x 2 x double> %a_z,
+                                                               <vscale x 2 x double> %splat)
+  ret <vscale x 2 x double> %out
+}
+
+
+;; Arithmetic intrinsic declarations
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+attributes #0 = { "target-features"="+sve" }
+attributes #1 = { "target-features"="+sve,+use-experimental-zeroing-pseudos" }
diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll
new file mode 100644
index 00000000000000..07899b9490cf11
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll
@@ -0,0 +1,585 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @use(i32)
+declare void @use1(i1)
+
+define i8 @testi16i8(i16 %add) {
+; CHECK-LABEL: @testi16i8(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i32 @testi64i32(i64 %add) {
+; CHECK-LABEL: @testi64i32(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  ret i32 %cond.i
+}
+
+define i16 @testi32i16i8(i32 %add) {
+; CHECK-LABEL: @testi32i16i8(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[ADD:%.*]], 128
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A]], 256
+; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[ADD]], -1
+; CHECK-NEXT:    [[F:%.*]] = select i1 [[C]], i16 127, i16 -128
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %a = add i32 %add, 128
+  %cmp = icmp ult i32 %a, 256
+  %t = trunc i32 %add to i16
+  %c = icmp sgt i32 %add, -1
+  %f = select i1 %c, i16 127, i16 -128
+  %r = select i1 %cmp, i16 %t, i16 %f
+  ret i16 %r
+}
+
+define <4 x i16> @testv4i32i16i8(<4 x i32> %add) {
+; CHECK-LABEL: @testv4i32i16i8(
+; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[ADD:%.*]], <i32 128, i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[A]], <i32 256, i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[T:%.*]] = trunc <4 x i32> [[ADD]] to <4 x i16>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <4 x i32> [[ADD]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[F:%.*]] = select <4 x i1> [[C]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[CMP]], <4 x i16> [[T]], <4 x i16> [[F]]
+; CHECK-NEXT:    ret <4 x i16> [[R]]
+;
+  %a = add <4 x i32> %add, <i32 128, i32 128, i32 128, i32 128>
+  %cmp = icmp ult <4 x i32> %a, <i32 256, i32 256, i32 256, i32 256>
+  %t = trunc <4 x i32> %add to <4 x i16>
+  %c = icmp sgt <4 x i32> %add, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %f = select <4 x i1> %c, <4 x i16> <i16 127, i16 127, i16 127, i16 127>, <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>
+  %r = select <4 x i1> %cmp, <4 x i16> %t, <4 x i16> %f
+  ret <4 x i16> %r
+}
+
+define i32 @testi32i32i8(i32 %add) {
+; CHECK-LABEL: @testi32i32i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add i32 %add, 128
+  %cmp = icmp ult i32 %a, 256
+  %c = icmp sgt i32 %add, -1
+  %f = select i1 %c, i32 127, i32 -128
+  %r = select i1 %cmp, i32 %add, i32 %f
+  ret i32 %r
+}
+
+define i16 @test_truncfirst(i32 %add) {
+; CHECK-LABEL: @test_truncfirst(
+; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[ADD:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[T]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[T]], i16 -128
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %t = trunc i32 %add to i16
+  %a = add i16 %t, 128
+  %cmp = icmp ult i16 %a, 256
+  %c = icmp sgt i16 %t, -1
+  %f = select i1 %c, i16 127, i16 -128
+  %r = select i1 %cmp, i16 %t, i16 %f
+  ret i16 %r
+}
+
+define i16 @testtrunclowhigh(i32 %add, i16 %low, i16 %high) {
+; CHECK-LABEL: @testtrunclowhigh(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[ADD:%.*]], 128
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A]], 256
+; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[ADD]], -1
+; CHECK-NEXT:    [[F:%.*]] = select i1 [[C]], i16 [[HIGH:%.*]], i16 [[LOW:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i16 [[T]], i16 [[F]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %a = add i32 %add, 128
+  %cmp = icmp ult i32 %a, 256
+  %t = trunc i32 %add to i16
+  %c = icmp sgt i32 %add, -1
+  %f = select i1 %c, i16 %high, i16 %low
+  %r = select i1 %cmp, i16 %t, i16 %f
+  ret i16 %r
+}
+
+define i32 @testi64i32addsat(i32 %a, i32 %b) {
+; CHECK-LABEL: @testi64i32addsat(
+; CHECK-NEXT:    [[SA:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[SB:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[SA]], [[SB]]
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sa = sext i32 %a to i64
+  %sb = sext i32 %b to i64
+  %add = add i64 %sa, %sb
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  ret i32 %cond.i
+}
+
+define <4 x i8> @testv4i16i8(<4 x i16> %add) {
+; CHECK-LABEL: @testv4i16i8(
+; CHECK-NEXT:    [[SH:%.*]] = lshr <4 x i16> [[ADD:%.*]], <i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8>
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8>
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], <i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], <i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8>
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], <i8 127, i8 127, i8 127, i8 127>
+; CHECK-NEXT:    [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]]
+; CHECK-NEXT:    ret <4 x i8> [[COND_I]]
+;
+  %sh = lshr <4 x i16> %add, <i16 8, i16 8, i16 8, i16 8>
+  %conv.i = trunc <4 x i16> %sh to <4 x i8>
+  %conv1.i = trunc <4 x i16> %add to <4 x i8>
+  %shr2.i = ashr <4 x i8> %conv1.i, <i8 7, i8 7, i8 7, i8 7>
+  %cmp.not.i = icmp eq <4 x i8> %shr2.i, %conv.i
+  %shr4.i = ashr <4 x i16> %add, <i16 15, i16 15, i16 15, i16 15>
+  %conv5.i = trunc <4 x i16> %shr4.i to <4 x i8>
+  %xor.i = xor <4 x i8> %conv5.i, <i8 127, i8 127, i8 127, i8 127>
+  %cond.i = select <4 x i1> %cmp.not.i, <4 x i8> %conv1.i, <4 x i8> %xor.i
+  ret <4 x i8> %cond.i
+}
+
+define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: @testv4i16i8add(
+; CHECK-NEXT:    [[SA:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[SB:%.*]] = sext <4 x i8> [[B:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <4 x i16> [[SA]], [[SB]]
+; CHECK-NEXT:    [[SH:%.*]] = lshr <4 x i16> [[ADD]], <i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8>
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8>
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], <i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], <i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8>
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], <i8 127, i8 127, i8 127, i8 127>
+; CHECK-NEXT:    [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]]
+; CHECK-NEXT:    ret <4 x i8> [[COND_I]]
+;
+  %sa = sext <4 x i8> %a to <4 x i16>
+  %sb = sext <4 x i8> %b to <4 x i16>
+  %add = add <4 x i16> %sa, %sb
+  %sh = lshr <4 x i16> %add, <i16 8, i16 8, i16 8, i16 8>
+  %conv.i = trunc <4 x i16> %sh to <4 x i8>
+  %conv1.i = trunc <4 x i16> %add to <4 x i8>
+  %shr2.i = ashr <4 x i8> %conv1.i, <i8 7, i8 7, i8 7, i8 7>
+  %cmp.not.i = icmp eq <4 x i8> %shr2.i, %conv.i
+  %shr4.i = ashr <4 x i16> %add, <i16 15, i16 15, i16 15, i16 15>
+  %conv5.i = trunc <4 x i16> %shr4.i to <4 x i8>
+  %xor.i = xor <4 x i8> %conv5.i, <i8 127, i8 127, i8 127, i8 127>
+  %cond.i = select <4 x i1> %cmp.not.i, <4 x i8> %conv1.i, <4 x i8> %xor.i
+  ret <4 x i8> %cond.i
+}
+
+define i8 @testi16i8_revcmp(i16 %add) {
+; CHECK-LABEL: @testi16i8_revcmp(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %conv.i, %shr2.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i8 @testi16i8_revselect(i16 %add) {
+; CHECK-LABEL: @testi16i8_revselect(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I_NOT:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I_NOT]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp ne i8 %conv.i, %shr2.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %xor.i, i8 %conv1.i
+  ret i8 %cond.i
+}
+
+define i8 @testi32i8(i32 %add) {
+; CHECK-LABEL: @testi32i8(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i32 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i32 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i32 %add, 8
+  %conv.i = trunc i32 %sh to i8
+  %conv1.i = trunc i32 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i32 %add, 15
+  %conv5.i = trunc i32 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i16 @differentconsts(i32 %x, i16 %replacement_low, i16 %replacement_high) {
+; CHECK-LABEL: @differentconsts(
+; CHECK-NEXT:    [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
+; CHECK-NEXT:    [[T1:%.*]] = select i1 [[T0]], i16 256, i16 -1
+; CHECK-NEXT:    [[T2:%.*]] = add i32 [[X]], 16
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 144
+; CHECK-NEXT:    [[T4:%.*]] = trunc i32 [[X]] to i16
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[T3]], i16 [[T4]], i16 [[T1]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %t0 = icmp slt i32 %x, 128
+  %t1 = select i1 %t0, i16 256, i16 65535
+  %t2 = add i32 %x, 16
+  %t3 = icmp ult i32 %t2, 144
+  %t4 = trunc i32 %x to i16
+  %r = select i1 %t3, i16 %t4, i16 %t1
+  ret i16 %r
+}
+
+define i8 @badimm1(i16 %add) {
+; CHECK-LABEL: @badimm1(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 9
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 9
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i8 @badimm2(i16 %add) {
+; CHECK-LABEL: @badimm2(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 6
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 6
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i8 @badimm3(i16 %add) {
+; CHECK-LABEL: @badimm3(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 14
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i16 %add, 14
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 127
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+define i8 @badimm4(i16 %add) {
+; CHECK-LABEL: @badimm4(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 126
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
+; CHECK-NEXT:    ret i8 [[COND_I]]
+;
+  %sh = lshr i16 %add, 8
+  %conv.i = trunc i16 %sh to i8
+  %conv1.i = trunc i16 %add to i8
+  %shr2.i = ashr i8 %conv1.i, 7
+  %cmp.not.i = icmp eq i8 %shr2.i, %conv.i
+  %shr4.i = ashr i16 %add, 15
+  %conv5.i = trunc i16 %shr4.i to i8
+  %xor.i = xor i8 %conv5.i, 126
+  %cond.i = select i1 %cmp.not.i, i8 %conv1.i, i8 %xor.i
+  ret i8 %cond.i
+}
+
+; One use checks
+
+define i32 @oneusexor(i64 %add) {
+; CHECK-LABEL: @oneusexor(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    call void @use(i32 [[XOR_I]])
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  call void @use(i32 %xor.i)
+  ret i32 %cond.i
+}
+
+define i32 @oneuseconv(i64 %add) {
+; CHECK-LABEL: @oneuseconv(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    call void @use(i32 [[CONV1_I]])
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  call void @use(i32 %conv1.i)
+  ret i32 %cond.i
+}
+
+define i32 @oneusecmp(i64 %add) {
+; CHECK-LABEL: @oneusecmp(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    call void @use1(i1 [[CMP_NOT_I]])
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  call void @use1(i1 %cmp.not.i)
+  ret i32 %cond.i
+}
+
+define i32 @oneuseboth(i64 %add) {
+; CHECK-LABEL: @oneuseboth(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    call void @use(i32 [[XOR_I]])
+; CHECK-NEXT:    call void @use(i32 [[CONV1_I]])
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  call void @use(i32 %xor.i)
+  call void @use(i32 %conv1.i)
+  ret i32 %cond.i
+}
+
+define i32 @oneusethree(i64 %add) {
+; CHECK-LABEL: @oneusethree(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
+; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
+; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
+; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
+; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    call void @use(i32 [[XOR_I]])
+; CHECK-NEXT:    call void @use(i32 [[CONV1_I]])
+; CHECK-NEXT:    call void @use1(i1 [[CMP_NOT_I]])
+; CHECK-NEXT:    ret i32 [[COND_I]]
+;
+  %sh = lshr i64 %add, 32
+  %conv.i = trunc i64 %sh to i32
+  %conv1.i = trunc i64 %add to i32
+  %shr2.i = ashr i32 %conv1.i, 31
+  %cmp.not.i = icmp eq i32 %shr2.i, %conv.i
+  %shr4.i = ashr i64 %add, 63
+  %conv5.i = trunc i64 %shr4.i to i32
+  %xor.i = xor i32 %conv5.i, 2147483647
+  %cond.i = select i1 %cmp.not.i, i32 %conv1.i, i32 %xor.i
+  call void @use(i32 %xor.i)
+  call void @use(i32 %conv1.i)
+  call void @use1(i1 %cmp.not.i)
+  ret i32 %cond.i
+}
+
+define i8 @C0zero(i8 %X, i8 %y, i8 %z) {
+; CHECK-LABEL: @C0zero(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X:%.*]], -10
+; CHECK-NEXT:    [[F:%.*]] = select i1 [[C]], i8 [[Y:%.*]], i8 [[Z:%.*]]
+; CHECK-NEXT:    ret i8 [[F]]
+;
+  %a = add i8 %X, 10
+  %cmp = icmp ult i8 %a, 0
+  %c = icmp slt i8 %X, -10
+  %f = select i1 %c, i8 %y, i8 %z
+  %r = select i1 %cmp, i8 %X, i8 %f
+  ret i8 %r
+}
diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp
index bc78869f9c54d9..21533652b11c21 100644
--- a/llvm/unittests/IR/ConstantRangeTest.cpp
+++ b/llvm/unittests/IR/ConstantRangeTest.cpp
@@ -1081,6 +1081,20 @@ TEST_F(ConstantRangeTest, Multiply) {
             ConstantRange(APInt(8, -2), APInt(8, 1)));
 }
 
+TEST_F(ConstantRangeTest, smul_fast) {
+  TestBinaryOpExhaustive(
+      [](const ConstantRange &CR1, const ConstantRange &CR2) {
+        return CR1.smul_fast(CR2);
+      },
+      [](const APInt &N1, const APInt &N2) {
+        return N1 * N2;
+      },
+      PreferSmallest,
+      [](const ConstantRange &, const ConstantRange &) {
+        return false; // Check correctness only.
+      });
+}
+
 TEST_F(ConstantRangeTest, UMax) {
   EXPECT_EQ(Full.umax(Full), Full);
   EXPECT_EQ(Full.umax(Empty), Empty);
diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index 795c81dc3f6f66..15a6b92d7c7139 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -110,6 +110,7 @@ static_library("Support") {
     "PrettyStackTrace.cpp",
     "RISCVAttributeParser.cpp",
     "RISCVAttributes.cpp",
+    "RISCVISAInfo.cpp",
     "RWMutex.cpp",
     "RandomNumberGenerator.cpp",
     "Regex.cpp",