diff --git a/bolt/test/runtime/AArch64/inline-memcpy.s b/bolt/test/runtime/AArch64/inline-memcpy.s
index badff299603a0..75066c855b9ed 100644
--- a/bolt/test/runtime/AArch64/inline-memcpy.s
+++ b/bolt/test/runtime/AArch64/inline-memcpy.s
@@ -81,14 +81,14 @@
 # CHECK-ASM: bl{{.*}}<memcpy
 
 # Register move should NOT be inlined (size unknown at compile time)
-# CHECK-ASM-LABEL: <test_register_move_negative>:
+# CHECK-ASM-LABEL: <test_register_move_unknown>:
 # CHECK-ASM: bl{{.*}}<memcpy
 
-# CHECK-ASM-LABEL: <test_x2_rewrite_negative>:
+# CHECK-ASM-LABEL: <test_x2_rewrite_unknown>:
 # CHECK-ASM: bl{{.*}}<memcpy
 
 # Live-in parameter should NOT be inlined (size unknown at compile time)
-# CHECK-ASM-LABEL: <test_live_in_negative>:
+# CHECK-ASM-LABEL: <test_live_in_unknown>:
 # CHECK-ASM: bl{{.*}}<memcpy
 
 # _memcpy8 should be inlined with end-pointer return (dest+size)
@@ -262,9 +262,9 @@ test_4_byte_add_immediate:
 	ret
 	.size	test_4_byte_add_immediate, .-test_4_byte_add_immediate
 
-	.globl	test_register_move_negative
-	.type	test_register_move_negative,@function
-test_register_move_negative:
+	.globl	test_register_move_unknown
+	.type	test_register_move_unknown,@function
+test_register_move_unknown:
 	stp	x29, x30, [sp, #-32]!
 	mov	x29, sp
 	add	x1, sp, #16
@@ -274,20 +274,20 @@ test_register_move_negative:
 	bl	memcpy
 	ldp	x29, x30, [sp], #32
 	ret
-	.size	test_register_move_negative, .-test_register_move_negative
+	.size	test_register_move_unknown, .-test_register_move_unknown
 
-	.globl  test_x2_rewrite_negative
-	.type   test_x2_rewrite_negative,@function
-test_x2_rewrite_negative:
+	.globl  test_x2_rewrite_unknown
+	.type   test_x2_rewrite_unknown,@function
+test_x2_rewrite_unknown:
 	mov     x2, #8
 	ldr     x2, [sp, #24]
 	bl      memcpy
 	ret
-	.size   test_x2_rewrite_negative, .-test_x2_rewrite_negative
+	.size   test_x2_rewrite_unknown, .-test_x2_rewrite_unknown
 
-	.globl	test_live_in_negative
-	.type	test_live_in_negative,@function
-test_live_in_negative:
+	.globl	test_live_in_unknown
+	.type	test_live_in_unknown,@function
+test_live_in_unknown:
 	# x2 comes in as parameter, no instruction sets it (should NOT inline)
 	stp	x29, x30, [sp, #-32]!
 	mov	x29, sp
@@ -297,7 +297,7 @@ test_live_in_negative:
 	bl	memcpy
 	ldp	x29, x30, [sp], #32
 	ret
-	.size	test_live_in_negative, .-test_live_in_negative
+	.size	test_live_in_unknown, .-test_live_in_unknown
 
 	.globl	test_memcpy8_4_byte
 	.type	test_memcpy8_4_byte,@function
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst
index cc0c729aaacdc..b359d85ad0cdc 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst
@@ -4,7 +4,7 @@ cert-mem57-cpp
 ==============
 
 The `cert-mem57-cpp` is an aliaes, please see
-`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type>`_
+`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type.html>`_
 for more information.
 
 This check corresponds to the CERT C++ Coding Standard rule
diff --git a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst
index ec9ef1c60913c..6c994a48d83de 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst
@@ -14,3 +14,21 @@ should be generally avoided.
   // becomes
 
   static std::string Moo = (Twine("bark") + "bah").str();
+
+The ``Twine`` does not own the memory of its contents, so it is not
+recommended to use ``Twine`` created from temporary strings or string literals.
+
+.. code-block:: c++
+
+  static Twine getModuleIdentifier(StringRef moduleName) {
+    return moduleName + "_module";
+  }
+  void foo() {
+    Twine result = getModuleIdentifier(std::string{"abc"} + "def");
+    // temporary std::string is destroyed here, result is dangling
+  }
+
+After applying this fix-it hints, the code will use ``std::string`` instead of
+``Twine`` for local variables. However, ``Twine`` has lots of methods that
+are incompatible with ``std::string``, so the user may need to adjust the code
+manually after applying the fix-it hints.
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 48ef8be9fb782..6f099a7027a10 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -180,6 +180,8 @@ struct MissingFeatures {
   static bool atomicSyncScopeID() { return false; }
   static bool atomicTypes() { return false; }
   static bool atomicUseLibCall() { return false; }
+  static bool atomicMicrosoftVolatile() { return false; }
+  static bool atomicOpenMP() { return false; }
 
   // Global ctor handling
   static bool globalCtorLexOrder() { return false; }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index c67ed99b1f49e..eb8d7d1112016 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11668,7 +11668,7 @@ class Sema final : public SemaBase {
       ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc);
 
   DeclResult ActOnVarTemplateSpecialization(
-      Scope *S, Declarator &D, TypeSourceInfo *DI, LookupResult &Previous,
+      Scope *S, Declarator &D, TypeSourceInfo *TSI, LookupResult &Previous,
       SourceLocation TemplateKWLoc, TemplateParameterList *TemplateParams,
       StorageClass SC, bool IsPartialSpecialization);
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 2669f62456711..fab907b9c1a40 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3108,9 +3108,9 @@ TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T,
 
 TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T,
                                                      SourceLocation L) const {
-  TypeSourceInfo *DI = CreateTypeSourceInfo(T);
-  DI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L);
-  return DI;
+  TypeSourceInfo *TSI = CreateTypeSourceInfo(T);
+  TSI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L);
+  return TSI;
 }
 
 const ASTRecordLayout &
@@ -5891,11 +5891,11 @@ TypeSourceInfo *ASTContext::getTemplateSpecializationTypeInfo(
   QualType TST = getTemplateSpecializationType(
       Keyword, Name, SpecifiedArgs.arguments(), CanonicalArgs, Underlying);
 
-  TypeSourceInfo *DI = CreateTypeSourceInfo(TST);
-  DI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>().set(
+  TypeSourceInfo *TSI = CreateTypeSourceInfo(TST);
+  TSI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>().set(
       ElaboratedKeywordLoc, QualifierLoc, TemplateKeywordLoc, NameLoc,
       SpecifiedArgs);
-  return DI;
+  return TSI;
 }
 
 QualType ASTContext::getTemplateSpecializationType(
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 6c088469a3ca2..4e634000adc3b 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -208,6 +208,19 @@ template <class Emitter> class LocOverrideScope final {
 } // namespace interp
 } // namespace clang
 
+template <class Emitter>
+bool Compiler<Emitter>::isValidBitCast(const CastExpr *E) {
+  QualType FromTy = E->getSubExpr()->getType()->getPointeeType();
+  QualType ToTy = E->getType()->getPointeeType();
+
+  if (classify(FromTy) == classify(ToTy))
+    return true;
+
+  if (FromTy->isVoidType() || ToTy->isVoidType())
+    return true;
+  return false;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
   const Expr *SubExpr = CE->getSubExpr();
@@ -476,8 +489,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     return this->delegate(SubExpr);
 
   case CK_BitCast: {
+    QualType CETy = CE->getType();
     // Reject bitcasts to atomic types.
-    if (CE->getType()->isAtomicType()) {
+    if (CETy->isAtomicType()) {
       if (!this->discard(SubExpr))
         return false;
       return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, CE);
@@ -492,6 +506,10 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     if (!FromT || !ToT)
       return false;
 
+    if (!this->isValidBitCast(CE) &&
+        !this->emitInvalidCast(CastKind::ReinterpretLike, /*Fatal=*/false, CE))
+      return false;
+
     assert(isPtrType(*FromT));
     assert(isPtrType(*ToT));
     if (FromT == ToT) {
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 5c46f75af4da3..fac0a7f4e1886 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -425,6 +425,8 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
 
   bool refersToUnion(const Expr *E);
 
+  bool isValidBitCast(const CastExpr *E);
+
 protected:
   /// Variable to storage mapping.
   llvm::DenseMap<const ValueDecl *, Scope::Local> Locals;
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 5ab9c8ee75a51..6877b03f5916b 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -1915,6 +1915,9 @@ bool Load(InterpState &S, CodePtr OpPC) {
     return false;
   if (!Ptr.isBlockPointer())
     return false;
+  if (const Descriptor *D = Ptr.getFieldDesc();
+      !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name)
+    return false;
   S.Stk.push<T>(Ptr.deref<T>());
   return true;
 }
@@ -1926,6 +1929,9 @@ bool LoadPop(InterpState &S, CodePtr OpPC) {
     return false;
   if (!Ptr.isBlockPointer())
     return false;
+  if (const Descriptor *D = Ptr.getFieldDesc();
+      !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name)
+    return false;
   S.Stk.push<T>(Ptr.deref<T>());
   return true;
 }
@@ -3288,12 +3294,18 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
                         bool Fatal) {
   const SourceLocation &Loc = S.Current->getLocation(OpPC);
 
-  if (Kind == CastKind::Reinterpret) {
+  switch (Kind) {
+  case CastKind::Reinterpret:
     S.CCEDiag(Loc, diag::note_constexpr_invalid_cast)
-        << static_cast<unsigned>(Kind) << S.Current->getRange(OpPC);
+        << diag::ConstexprInvalidCastKind::Reinterpret
+        << S.Current->getRange(OpPC);
     return !Fatal;
-  }
-  if (Kind == CastKind::Volatile) {
+  case CastKind::ReinterpretLike:
+    S.CCEDiag(Loc, diag::note_constexpr_invalid_cast)
+        << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
+        << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
+    return !Fatal;
+  case CastKind::Volatile:
     if (!S.checkingPotentialConstantExpression()) {
       const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC));
       if (S.getLangOpts().CPlusPlus)
@@ -3304,14 +3316,13 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
     }
 
     return false;
-  }
-  if (Kind == CastKind::Dynamic) {
+  case CastKind::Dynamic:
     assert(!S.getLangOpts().CPlusPlus20);
-    S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast)
+    S.CCEDiag(Loc, diag::note_constexpr_invalid_cast)
         << diag::ConstexprInvalidCastKind::Dynamic;
     return true;
   }
-
+  llvm_unreachable("Unhandled CastKind");
   return false;
 }
 
diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h
index 54fd39ac6fcc8..f0454b484ff98 100644
--- a/clang/lib/AST/ByteCode/PrimType.h
+++ b/clang/lib/AST/ByteCode/PrimType.h
@@ -101,6 +101,7 @@ inline constexpr bool isSignedType(PrimType T) {
 
 enum class CastKind : uint8_t {
   Reinterpret,
+  ReinterpretLike,
   Volatile,
   Dynamic,
 };
@@ -111,6 +112,9 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
   case interp::CastKind::Reinterpret:
     OS << "reinterpret_cast";
     break;
+  case interp::CastKind::ReinterpretLike:
+    OS << "reinterpret_like";
+    break;
   case interp::CastKind::Volatile:
     OS << "volatile";
     break;
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index e0b2852f0e906..4d34e0b0a9b46 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -36,30 +36,19 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) {
   const size_t BitWidth = CharWidth * Ctx.getCharBit();
   unsigned StringLength = S->getLength();
 
-  PrimType CharType;
-  switch (CharWidth) {
-  case 1:
-    CharType = PT_Sint8;
-    break;
-  case 2:
-    CharType = PT_Uint16;
-    break;
-  case 4:
-    CharType = PT_Uint32;
-    break;
-  default:
-    llvm_unreachable("unsupported character width");
-  }
+  OptPrimType CharType =
+      Ctx.classify(S->getType()->castAsArrayTypeUnsafe()->getElementType());
+  assert(CharType);
 
   if (!Base)
     Base = S;
 
   // Create a descriptor for the string.
-  Descriptor *Desc =
-      allocateDescriptor(Base, CharType, Descriptor::GlobalMD, StringLength + 1,
-                         /*isConst=*/true,
-                         /*isTemporary=*/false,
-                         /*isMutable=*/false);
+  Descriptor *Desc = allocateDescriptor(Base, *CharType, Descriptor::GlobalMD,
+                                        StringLength + 1,
+                                        /*isConst=*/true,
+                                        /*isTemporary=*/false,
+                                        /*isMutable=*/false);
 
   // Allocate storage for the string.
   // The byte length does not include the null terminator.
@@ -79,26 +68,9 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) {
   } else {
     // Construct the string in storage.
     for (unsigned I = 0; I <= StringLength; ++I) {
-      const uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I);
-      switch (CharType) {
-      case PT_Sint8: {
-        using T = PrimConv<PT_Sint8>::T;
-        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
-        break;
-      }
-      case PT_Uint16: {
-        using T = PrimConv<PT_Uint16>::T;
-        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
-        break;
-      }
-      case PT_Uint32: {
-        using T = PrimConv<PT_Uint32>::T;
-        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
-        break;
-      }
-      default:
-        llvm_unreachable("unsupported character type");
-      }
+      uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I);
+      INT_TYPE_SWITCH_NO_BOOL(*CharType,
+                              Ptr.elem<T>(I) = T::from(CodePoint, BitWidth););
     }
   }
   Ptr.initializeAllElements();
@@ -218,21 +190,42 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) {
     return std::nullopt;
 
   Global *NewGlobal = Globals[*Idx];
+  // Note that this loop has one iteration where Redecl == VD.
   for (const Decl *Redecl : VD->redecls()) {
-    unsigned &PIdx = GlobalIndices[Redecl];
+
+    // If this redecl was registered as a dummy variable, it is now a proper
+    // global variable and points to the block we just created.
+    if (auto DummyIt = DummyVariables.find(Redecl);
+        DummyIt != DummyVariables.end()) {
+      assert(!Globals[DummyIt->second]->block()->hasPointers());
+      Globals[DummyIt->second] = NewGlobal;
+      DummyVariables.erase(DummyIt);
+    }
+    // If the redeclaration hasn't been registered yet at all, we just set its
+    // global index to Idx. If it has been registered yet, it might have
+    // pointers pointing to it and we need to transfer those pointers to the new
+    // block.
+    auto [Iter, Inserted] = GlobalIndices.try_emplace(Redecl);
+    if (Inserted) {
+      GlobalIndices[Redecl] = *Idx;
+      continue;
+    }
+
     if (Redecl != VD) {
-      if (Block *RedeclBlock = Globals[PIdx]->block();
+      if (Block *RedeclBlock = Globals[Iter->second]->block();
           RedeclBlock->isExtern()) {
-        Globals[PIdx] = NewGlobal;
+
         // All pointers pointing to the previous extern decl now point to the
         // new decl.
         // A previous iteration might've already fixed up the pointers for this
         // global.
         if (RedeclBlock != NewGlobal->block())
           RedeclBlock->movePointersTo(NewGlobal->block());
+
+        Globals[Iter->second] = NewGlobal;
       }
     }
-    PIdx = *Idx;
+    Iter->second = *Idx;
   }
 
   return *Idx;
diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h
index 28fcc97f5339d..cc9127dc77860 100644
--- a/clang/lib/AST/ByteCode/Program.h
+++ b/clang/lib/AST/ByteCode/Program.h
@@ -205,7 +205,6 @@ class Program final {
     const Block *block() const { return &B; }
 
   private:
-    /// Required metadata - does not actually track pointers.
     Block B;
   };
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 7db6e283ec0a5..cd4c1f0e5b769 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -27,6 +27,7 @@ class AtomicInfo {
   CharUnits atomicAlign;
   CharUnits valueAlign;
   TypeEvaluationKind evaluationKind = cir::TEK_Scalar;
+  bool useLibCall = true;
   LValue lvalue;
   mlir::Location loc;
 
@@ -62,8 +63,8 @@ class AtomicInfo {
       assert(!cir::MissingFeatures::atomicInfo());
       cgf.cgm.errorNYI(loc, "AtomicInfo: non-simple lvalue");
     }
-
-    assert(!cir::MissingFeatures::atomicUseLibCall());
+    useLibCall = !ctx.getTargetInfo().hasBuiltinAtomic(
+        atomicSizeInBits, ctx.toBits(lvalue.getAlignment()));
   }
 
   QualType getValueType() const { return valueTy; }
@@ -75,6 +76,8 @@ class AtomicInfo {
     assert(!cir::MissingFeatures::atomicInfoGetAtomicPointer());
     return nullptr;
   }
+  bool shouldUseLibCall() const { return useLibCall; }
+  const LValue &getAtomicLValue() const { return lvalue; }
   Address getAtomicAddress() const {
     mlir::Type elemTy;
     if (lvalue.isSimple()) {
@@ -96,6 +99,8 @@ class AtomicInfo {
 
   bool emitMemSetZeroIfNecessary() const;
 
+  mlir::Value getScalarRValValueOrNull(RValue rvalue) const;
+
   /// Cast the given pointer to an integer pointer suitable for atomic
   /// operations on the source.
   Address castToAtomicIntPointer(Address addr) const;
@@ -105,6 +110,9 @@ class AtomicInfo {
   /// copy the value across.
   Address convertToAtomicIntPointer(Address addr) const;
 
+  /// Converts a rvalue to integer value.
+  mlir::Value convertRValueToInt(RValue rvalue, bool cmpxchg = false) const;
+
   /// Copy an atomic r-value into atomic-layout memory.
   void emitCopyIntoMemory(RValue rvalue) const;
 
@@ -195,6 +203,12 @@ Address AtomicInfo::createTempAlloca() const {
   return tempAlloca;
 }
 
+mlir::Value AtomicInfo::getScalarRValValueOrNull(RValue rvalue) const {
+  if (rvalue.isScalar() && (!hasPadding() || !lvalue.isSimple()))
+    return rvalue.getValue();
+  return nullptr;
+}
+
 Address AtomicInfo::castToAtomicIntPointer(Address addr) const {
   auto intTy = mlir::dyn_cast<cir::IntType>(addr.getElementType());
   // Don't bother with int casts if the integer size is the same.
@@ -211,10 +225,38 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
     return false;
 
   cgf.cgm.errorNYI(loc,
-                   "AtomicInfo::emitMemSetZeroIfNecessary: emit memset zero");
+                   "AtomicInfo::emitMemSetZeroIfNecaessary: emit memset zero");
   return false;
 }
 
+/// Return true if \param valueTy is a type that should be casted to integer
+/// around the atomic memory operation. If \param cmpxchg is true, then the
+/// cast of a floating point type is made as that instruction can not have
+/// floating point operands.  TODO: Allow compare-and-exchange and FP - see
+/// comment in CIRGenAtomicExpandPass.cpp.
+static bool shouldCastToInt(mlir::Type valueTy, bool cmpxchg) {
+  if (cir::isAnyFloatingPointType(valueTy))
+    return isa<cir::FP80Type>(valueTy) || cmpxchg;
+  return !isa<cir::IntType>(valueTy) && !isa<cir::PointerType>(valueTy);
+}
+
+mlir::Value AtomicInfo::convertRValueToInt(RValue rvalue, bool cmpxchg) const {
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory. Floats get casted if needed by AtomicExpandPass.
+  if (mlir::Value value = getScalarRValValueOrNull(rvalue)) {
+    if (!shouldCastToInt(value.getType(), cmpxchg))
+      return cgf.emitToMemory(value, valueTy);
+
+    cgf.cgm.errorNYI(
+        loc, "AtomicInfo::convertRValueToInt: cast scalar rvalue to int");
+    return nullptr;
+  }
+
+  cgf.cgm.errorNYI(
+      loc, "AtomicInfo::convertRValueToInt: cast non-scalar rvalue to int");
+  return nullptr;
+}
+
 /// Copy an r-value into memory as part of storing to an atomic type.
 /// This needs to create a bit-pattern suitable for atomic operations.
 void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
@@ -815,6 +857,79 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) {
       e->getExprLoc());
 }
 
+void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, bool isInit) {
+  bool isVolatile = dest.isVolatileQualified();
+  auto order = cir::MemOrder::SequentiallyConsistent;
+  if (!dest.getType()->isAtomicType()) {
+    assert(!cir::MissingFeatures::atomicMicrosoftVolatile());
+  }
+  return emitAtomicStore(rvalue, dest, order, isVolatile, isInit);
+}
+
+/// Emit a store to an l-value of atomic type.
+///
+/// Note that the r-value is expected to be an r-value of the atomic type; this
+/// means that for aggregate r-values, it should include storage for any padding
+/// that was necessary.
+void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest,
+                                     cir::MemOrder order, bool isVolatile,
+                                     bool isInit) {
+  // If this is an aggregate r-value, it should agree in type except
+  // maybe for address-space qualification.
+  mlir::Location loc = dest.getPointer().getLoc();
+  assert(!rvalue.isAggregate() ||
+         rvalue.getAggregateAddress().getElementType() ==
+             dest.getAddress().getElementType());
+
+  AtomicInfo atomics(*this, dest, loc);
+  LValue lvalue = atomics.getAtomicLValue();
+
+  if (lvalue.isSimple()) {
+    // If this is an initialization, just put the value there normally.
+    if (isInit) {
+      atomics.emitCopyIntoMemory(rvalue);
+      return;
+    }
+
+    // Check whether we should use a library call.
+    if (atomics.shouldUseLibCall()) {
+      assert(!cir::MissingFeatures::atomicUseLibCall());
+      cgm.errorNYI(loc, "emitAtomicStore: atomic store with library call");
+      return;
+    }
+
+    // Okay, we're doing this natively.
+    mlir::Value valueToStore = atomics.convertRValueToInt(rvalue);
+
+    // Do the atomic store.
+    Address addr = atomics.getAtomicAddress();
+    if (mlir::Value value = atomics.getScalarRValValueOrNull(rvalue)) {
+      if (shouldCastToInt(value.getType(), /*CmpXchg=*/false)) {
+        addr = atomics.castToAtomicIntPointer(addr);
+        valueToStore =
+            builder.createIntCast(valueToStore, addr.getElementType());
+      }
+    }
+    cir::StoreOp store = builder.createStore(loc, valueToStore, addr);
+
+    // Initializations don't need to be atomic.
+    if (!isInit) {
+      assert(!cir::MissingFeatures::atomicOpenMP());
+      store.setMemOrder(order);
+    }
+
+    // Other decoration.
+    if (isVolatile)
+      store.setIsVolatile(true);
+
+    assert(!cir::MissingFeatures::opLoadStoreTbaa());
+    return;
+  }
+
+  cgm.errorNYI(loc, "emitAtomicStore: non-simple atomic lvalue");
+  assert(!cir::MissingFeatures::opLoadStoreAtomic());
+}
+
 void CIRGenFunction::emitAtomicInit(Expr *init, LValue dest) {
   AtomicInfo atomics(*this, dest, getLoc(init->getSourceRange()));
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 4fb178df0e508..422fa1cf5ad2e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -311,7 +311,8 @@ static LValue emitGlobalVarDeclLValue(CIRGenFunction &cgf, const Expr *e,
 
 void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
                                        bool isVolatile, QualType ty,
-                                       bool isInit, bool isNontemporal) {
+                                       LValueBaseInfo baseInfo, bool isInit,
+                                       bool isNontemporal) {
   assert(!cir::MissingFeatures::opLoadStoreThreadLocal());
 
   if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) {
@@ -333,7 +334,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
 
   value = emitToMemory(value, ty);
 
-  assert(!cir::MissingFeatures::opLoadStoreAtomic());
+  assert(!cir::MissingFeatures::opLoadStoreTbaa());
+  LValue atomicLValue = LValue::makeAddr(addr, ty, baseInfo);
+  if (ty->isAtomicType() ||
+      (!isInit && isLValueSuitableForInlineAtomic(atomicLValue))) {
+    emitAtomicStore(RValue::get(value), atomicLValue, isInit);
+    return;
+  }
 
   // Update the alloca with more info on initialization.
   assert(addr.getPointer() && "expected pointer to exist");
@@ -550,7 +557,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue,
   }
 
   emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
-                    lvalue.getType(), isInit, /*isNontemporal=*/false);
+                    lvalue.getType(), lvalue.getBaseInfo(), isInit,
+                    /*isNontemporal=*/false);
 }
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index dece642eb13b6..1c52a78d72e33 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1271,6 +1271,9 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   RValue emitAtomicExpr(AtomicExpr *e);
   void emitAtomicInit(Expr *init, LValue dest);
+  void emitAtomicStore(RValue rvalue, LValue dest, bool isInit);
+  void emitAtomicStore(RValue rvalue, LValue dest, cir::MemOrder order,
+                       bool isVolatile, bool isInit);
 
   AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d,
                                     mlir::OpBuilder::InsertPoint ip = {});
@@ -1680,8 +1683,8 @@ class CIRGenFunction : public CIRGenTypeCache {
                           bool isInit);
 
   void emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile,
-                         clang::QualType ty, bool isInit = false,
-                         bool isNontemporal = false);
+                         clang::QualType ty, LValueBaseInfo baseInfo,
+                         bool isInit = false, bool isNontemporal = false);
   void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit);
 
   /// Store the specified rvalue into the specified
diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index 3df9f9c1d68c7..53ff818a2af53 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -4730,13 +4730,13 @@ ParmVarDecl *SemaObjC::ActOnMethodParmDeclaration(Scope *S,
                                                   bool MethodDefinition) {
   ASTContext &Context = getASTContext();
   QualType ArgType;
-  TypeSourceInfo *DI;
+  TypeSourceInfo *TSI;
 
   if (!ArgInfo.Type) {
     ArgType = Context.getObjCIdType();
-    DI = nullptr;
+    TSI = nullptr;
   } else {
-    ArgType = SemaRef.GetTypeFromParser(ArgInfo.Type, &DI);
+    ArgType = SemaRef.GetTypeFromParser(ArgInfo.Type, &TSI);
   }
   LookupResult R(SemaRef, ArgInfo.Name, ArgInfo.NameLoc,
                  Sema::LookupOrdinaryName,
@@ -4753,14 +4753,14 @@ ParmVarDecl *SemaObjC::ActOnMethodParmDeclaration(Scope *S,
     }
   }
   SourceLocation StartLoc =
-      DI ? DI->getTypeLoc().getBeginLoc() : ArgInfo.NameLoc;
+      TSI ? TSI->getTypeLoc().getBeginLoc() : ArgInfo.NameLoc;
 
   // Temporarily put parameter variables in the translation unit. This is what
   // ActOnParamDeclarator does in the case of C arguments to the Objective-C
   // method too.
   ParmVarDecl *Param = SemaRef.CheckParameter(
       Context.getTranslationUnitDecl(), StartLoc, ArgInfo.NameLoc, ArgInfo.Name,
-      ArgType, DI, SC_None);
+      ArgType, TSI, SC_None);
   Param->setObjCMethodScopeInfo(ParamIndex);
   Param->setObjCDeclQualifier(
       CvtQTToAstBitMask(ArgInfo.DeclSpec.getObjCDeclQualifier()));
diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp
index 4b63eb7df1054..12cc02965e7d3 100644
--- a/clang/lib/Sema/SemaFunctionEffects.cpp
+++ b/clang/lib/Sema/SemaFunctionEffects.cpp
@@ -1302,6 +1302,14 @@ class Analyzer {
       return true;
     }
 
+    bool TraverseCXXRecordDecl(CXXRecordDecl *D) override {
+      // Completely skip local struct/class/union declarations since their
+      // methods would otherwise be incorrectly interpreted as part of the
+      // function we are currently traversing. The initial Sema pass will have
+      // already recorded any nonblocking methods needing analysis.
+      return true;
+    }
+
     bool TraverseConstructorInitializer(CXXCtorInitializer *Init) override {
       ViolationSite PrevVS = VSite;
       if (Init->isAnyMemberInitializer())
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 2cc65935def53..983a7842ef450 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -949,11 +949,11 @@ static TemplateArgumentLoc translateTemplateArgument(Sema &SemaRef,
 
   switch (Arg.getKind()) {
   case ParsedTemplateArgument::Type: {
-    TypeSourceInfo *DI;
-    QualType T = SemaRef.GetTypeFromParser(Arg.getAsType(), &DI);
-    if (!DI)
-      DI = SemaRef.Context.getTrivialTypeSourceInfo(T, Arg.getNameLoc());
-    return TemplateArgumentLoc(TemplateArgument(T), DI);
+    TypeSourceInfo *TSI;
+    QualType T = SemaRef.GetTypeFromParser(Arg.getAsType(), &TSI);
+    if (!TSI)
+      TSI = SemaRef.Context.getTrivialTypeSourceInfo(T, Arg.getNameLoc());
+    return TemplateArgumentLoc(TemplateArgument(T), TSI);
   }
 
   case ParsedTemplateArgument::NonType: {
@@ -4329,7 +4329,7 @@ void Sema::CheckDeductionGuideTemplate(FunctionTemplateDecl *TD) {
 }
 
 DeclResult Sema::ActOnVarTemplateSpecialization(
-    Scope *S, Declarator &D, TypeSourceInfo *DI, LookupResult &Previous,
+    Scope *S, Declarator &D, TypeSourceInfo *TSI, LookupResult &Previous,
     SourceLocation TemplateKWLoc, TemplateParameterList *TemplateParams,
     StorageClass SC, bool IsPartialSpecialization) {
   // D must be variable template id.
@@ -4455,8 +4455,8 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     VarTemplatePartialSpecializationDecl *Partial =
         VarTemplatePartialSpecializationDecl::Create(
             Context, VarTemplate->getDeclContext(), TemplateKWLoc,
-            TemplateNameLoc, TemplateParams, VarTemplate, DI->getType(), DI, SC,
-            CTAI.CanonicalConverted);
+            TemplateNameLoc, TemplateParams, VarTemplate, TSI->getType(), TSI,
+            SC, CTAI.CanonicalConverted);
     Partial->setTemplateArgsAsWritten(TemplateArgs);
 
     if (!PrevPartial)
@@ -4474,7 +4474,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     // this explicit specialization or friend declaration.
     Specialization = VarTemplateSpecializationDecl::Create(
         Context, VarTemplate->getDeclContext(), TemplateKWLoc, TemplateNameLoc,
-        VarTemplate, DI->getType(), DI, SC, CTAI.CanonicalConverted);
+        VarTemplate, TSI->getType(), TSI, SC, CTAI.CanonicalConverted);
     Specialization->setTemplateArgsAsWritten(TemplateArgs);
 
     if (!PrevDecl)
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index bfcd3978817ca..40811d4c42e2a 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -632,34 +632,34 @@ struct ConvertConstructorToDeductionGuideTransform {
       ParmVarDecl *OldParam, MultiLevelTemplateArgumentList &Args,
       llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs,
       bool TransformingOuterPatterns) {
-    TypeSourceInfo *OldDI = OldParam->getTypeSourceInfo();
-    TypeSourceInfo *NewDI;
-    if (auto PackTL = OldDI->getTypeLoc().getAs<PackExpansionTypeLoc>()) {
+    TypeSourceInfo *OldTSI = OldParam->getTypeSourceInfo();
+    TypeSourceInfo *NewTSI;
+    if (auto PackTL = OldTSI->getTypeLoc().getAs<PackExpansionTypeLoc>()) {
       // Expand out the one and only element in each inner pack.
       Sema::ArgPackSubstIndexRAII SubstIndex(SemaRef, 0u);
-      NewDI =
+      NewTSI =
           SemaRef.SubstType(PackTL.getPatternLoc(), Args,
                             OldParam->getLocation(), OldParam->getDeclName());
-      if (!NewDI)
+      if (!NewTSI)
         return nullptr;
-      NewDI =
-          SemaRef.CheckPackExpansion(NewDI, PackTL.getEllipsisLoc(),
+      NewTSI =
+          SemaRef.CheckPackExpansion(NewTSI, PackTL.getEllipsisLoc(),
                                      PackTL.getTypePtr()->getNumExpansions());
     } else
-      NewDI = SemaRef.SubstType(OldDI, Args, OldParam->getLocation(),
-                                OldParam->getDeclName());
-    if (!NewDI)
+      NewTSI = SemaRef.SubstType(OldTSI, Args, OldParam->getLocation(),
+                                 OldParam->getDeclName());
+    if (!NewTSI)
       return nullptr;
 
     // Extract the type. This (for instance) replaces references to typedef
     // members of the current instantiations with the definitions of those
     // typedefs, avoiding triggering instantiation of the deduced type during
     // deduction.
-    NewDI = ExtractTypeForDeductionGuide(
-                SemaRef, MaterializedTypedefs, NestedPattern,
-                TransformingOuterPatterns ? &Args : nullptr)
-                .transform(NewDI);
-    if (!NewDI)
+    NewTSI = ExtractTypeForDeductionGuide(
+                 SemaRef, MaterializedTypedefs, NestedPattern,
+                 TransformingOuterPatterns ? &Args : nullptr)
+                 .transform(NewTSI);
+    if (!NewTSI)
       return nullptr;
     // Resolving a wording defect, we also inherit default arguments from the
     // constructor.
@@ -667,7 +667,7 @@ struct ConvertConstructorToDeductionGuideTransform {
     if (OldParam->hasDefaultArg()) {
       // We don't care what the value is (we won't use it); just create a
       // placeholder to indicate there is a default argument.
-      QualType ParamTy = NewDI->getType();
+      QualType ParamTy = NewTSI->getType();
       NewDefArg = new (SemaRef.Context)
           OpaqueValueExpr(OldParam->getDefaultArgRange().getBegin(),
                           ParamTy.getNonLValueExprType(SemaRef.Context),
@@ -676,13 +676,13 @@ struct ConvertConstructorToDeductionGuideTransform {
                                                              : VK_PRValue);
     }
     // Handle arrays and functions decay.
-    auto NewType = NewDI->getType();
+    auto NewType = NewTSI->getType();
     if (NewType->isArrayType() || NewType->isFunctionType())
       NewType = SemaRef.Context.getDecayedType(NewType);
 
     ParmVarDecl *NewParam = ParmVarDecl::Create(
         SemaRef.Context, DC, OldParam->getInnerLocStart(),
-        OldParam->getLocation(), OldParam->getIdentifier(), NewType, NewDI,
+        OldParam->getLocation(), OldParam->getIdentifier(), NewType, NewTSI,
         OldParam->getStorageClass(), NewDefArg.get());
     NewParam->setScopeInfo(OldParam->getFunctionScopeDepth(),
                            OldParam->getFunctionScopeIndex());
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 7f858050db13e..5fceacd0c00eb 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -3156,25 +3156,25 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
                        const MultiLevelTemplateArgumentList &TemplateArgs,
                        int indexAdjustment, UnsignedOrNone NumExpansions,
                        bool ExpectParameterPack, bool EvaluateConstraint) {
-  TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo();
-  TypeSourceInfo *NewDI = nullptr;
+  TypeSourceInfo *OldTSI = OldParm->getTypeSourceInfo();
+  TypeSourceInfo *NewTSI = nullptr;
 
-  TypeLoc OldTL = OldDI->getTypeLoc();
+  TypeLoc OldTL = OldTSI->getTypeLoc();
   if (PackExpansionTypeLoc ExpansionTL = OldTL.getAs<PackExpansionTypeLoc>()) {
 
     // We have a function parameter pack. Substitute into the pattern of the
     // expansion.
-    NewDI = SubstType(ExpansionTL.getPatternLoc(), TemplateArgs,
-                      OldParm->getLocation(), OldParm->getDeclName());
-    if (!NewDI)
+    NewTSI = SubstType(ExpansionTL.getPatternLoc(), TemplateArgs,
+                       OldParm->getLocation(), OldParm->getDeclName());
+    if (!NewTSI)
       return nullptr;
 
-    if (NewDI->getType()->containsUnexpandedParameterPack()) {
+    if (NewTSI->getType()->containsUnexpandedParameterPack()) {
       // We still have unexpanded parameter packs, which means that
       // our function parameter is still a function parameter pack.
       // Therefore, make its type a pack expansion type.
-      NewDI = CheckPackExpansion(NewDI, ExpansionTL.getEllipsisLoc(),
-                                 NumExpansions);
+      NewTSI = CheckPackExpansion(NewTSI, ExpansionTL.getEllipsisLoc(),
+                                  NumExpansions);
     } else if (ExpectParameterPack) {
       // We expected to get a parameter pack but didn't (because the type
       // itself is not a pack expansion type), so complain. This can occur when
@@ -3182,18 +3182,18 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
       // pack expansion.
       Diag(OldParm->getLocation(),
            diag::err_function_parameter_pack_without_parameter_packs)
-        << NewDI->getType();
+          << NewTSI->getType();
       return nullptr;
     }
   } else {
-    NewDI = SubstType(OldDI, TemplateArgs, OldParm->getLocation(),
-                      OldParm->getDeclName());
+    NewTSI = SubstType(OldTSI, TemplateArgs, OldParm->getLocation(),
+                       OldParm->getDeclName());
   }
 
-  if (!NewDI)
+  if (!NewTSI)
     return nullptr;
 
-  if (NewDI->getType()->isVoidType()) {
+  if (NewTSI->getType()->isVoidType()) {
     Diag(OldParm->getLocation(), diag::err_param_with_void_type);
     return nullptr;
   }
@@ -3205,7 +3205,7 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
   // here, when the instantiated versions of those referenced parameters are in
   // scope.
   if (TemplateTypeParmDecl *TTP =
-          GetContainedInventedTypeParmVisitor().Visit(OldDI->getType())) {
+          GetContainedInventedTypeParmVisitor().Visit(OldTSI->getType())) {
     if (const TypeConstraint *TC = TTP->getTypeConstraint()) {
       auto *Inst = cast_or_null<TemplateTypeParmDecl>(
           FindInstantiatedDecl(TTP->getLocation(), TTP, TemplateArgs));
@@ -3219,12 +3219,10 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
     }
   }
 
-  ParmVarDecl *NewParm = CheckParameter(Context.getTranslationUnitDecl(),
-                                        OldParm->getInnerLocStart(),
-                                        OldParm->getLocation(),
-                                        OldParm->getIdentifier(),
-                                        NewDI->getType(), NewDI,
-                                        OldParm->getStorageClass());
+  ParmVarDecl *NewParm = CheckParameter(
+      Context.getTranslationUnitDecl(), OldParm->getInnerLocStart(),
+      OldParm->getLocation(), OldParm->getIdentifier(), NewTSI->getType(),
+      NewTSI, OldParm->getStorageClass());
   if (!NewParm)
     return nullptr;
 
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 28925cca8f956..681bfe0d8cbf8 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1506,17 +1506,17 @@ TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
 Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
                                                            bool IsTypeAlias) {
   bool Invalid = false;
-  TypeSourceInfo *DI = D->getTypeSourceInfo();
-  if (DI->getType()->isInstantiationDependentType() ||
-      DI->getType()->isVariablyModifiedType()) {
-    DI = SemaRef.SubstType(DI, TemplateArgs,
-                           D->getLocation(), D->getDeclName());
-    if (!DI) {
+  TypeSourceInfo *TSI = D->getTypeSourceInfo();
+  if (TSI->getType()->isInstantiationDependentType() ||
+      TSI->getType()->isVariablyModifiedType()) {
+    TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(),
+                            D->getDeclName());
+    if (!TSI) {
       Invalid = true;
-      DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy);
+      TSI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy);
     }
   } else {
-    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
+    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType());
   }
 
   // HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong.
@@ -1525,7 +1525,7 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
   // semantics. See LWG issue 2141 for more information on the bug.  The bugs
   // are fixed in g++ and libstdc++ 4.9.0 (2014-04-22).
   if (SemaRef.getPreprocessor().NeedsStdLibCxxWorkaroundBefore(2014'04'22)) {
-    const DecltypeType *DT = DI->getType()->getAs<DecltypeType>();
+    const DecltypeType *DT = TSI->getType()->getAs<DecltypeType>();
     CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D->getDeclContext());
     if (DT && RD && isa<ConditionalOperator>(DT->getUnderlyingExpr()) &&
         DT->isReferenceType() &&
@@ -1534,18 +1534,18 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
         D->getIdentifier() && D->getIdentifier()->isStr("type") &&
         SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc()))
       // Fold it to the (non-reference) type which g++ would have produced.
-      DI = SemaRef.Context.getTrivialTypeSourceInfo(
-          DI->getType().getNonReferenceType());
+      TSI = SemaRef.Context.getTrivialTypeSourceInfo(
+          TSI->getType().getNonReferenceType());
   }
 
   // Create the new typedef
   TypedefNameDecl *Typedef;
   if (IsTypeAlias)
     Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
-                                    D->getLocation(), D->getIdentifier(), DI);
+                                    D->getLocation(), D->getIdentifier(), TSI);
   else
     Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
-                                  D->getLocation(), D->getIdentifier(), DI);
+                                  D->getLocation(), D->getIdentifier(), TSI);
   if (Invalid)
     Typedef->setInvalidDecl();
 
@@ -1554,7 +1554,7 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
   if (const TagType *oldTagType = D->getUnderlyingType()->getAs<TagType>()) {
     TagDecl *oldTag = oldTagType->getDecl();
     if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) {
-      TagDecl *newTag = DI->getType()->castAs<TagType>()->getDecl();
+      TagDecl *newTag = TSI->getType()->castAs<TagType>()->getDecl();
       assert(!newTag->hasNameForLinkage());
       newTag->setTypedefNameForAnonDecl(Typedef);
     }
@@ -1719,15 +1719,15 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
                                              ArrayRef<BindingDecl*> *Bindings) {
 
   // Do substitution on the type of the declaration
-  TypeSourceInfo *DI = SemaRef.SubstType(
+  TypeSourceInfo *TSI = SemaRef.SubstType(
       D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(),
-      D->getDeclName(), /*AllowDeducedTST*/true);
-  if (!DI)
+      D->getDeclName(), /*AllowDeducedTST*/ true);
+  if (!TSI)
     return nullptr;
 
-  if (DI->getType()->isFunctionType()) {
+  if (TSI->getType()->isFunctionType()) {
     SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
-      << D->isStaticDataMember() << DI->getType();
+        << D->isStaticDataMember() << TSI->getType();
     return nullptr;
   }
 
@@ -1739,12 +1739,12 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
   VarDecl *Var;
   if (Bindings)
     Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
-                                    D->getLocation(), DI->getType(), DI,
+                                    D->getLocation(), TSI->getType(), TSI,
                                     D->getStorageClass(), *Bindings);
   else
     Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
-                          D->getLocation(), D->getIdentifier(), DI->getType(),
-                          DI, D->getStorageClass());
+                          D->getLocation(), D->getIdentifier(), TSI->getType(),
+                          TSI, D->getStorageClass());
 
   // In ARC, infer 'retaining' for variables of retainable type.
   if (SemaRef.getLangOpts().ObjCAutoRefCount &&
@@ -1810,15 +1810,15 @@ Decl *TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl *D) {
 
 Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) {
   bool Invalid = false;
-  TypeSourceInfo *DI = D->getTypeSourceInfo();
-  if (DI->getType()->isInstantiationDependentType() ||
-      DI->getType()->isVariablyModifiedType())  {
-    DI = SemaRef.SubstType(DI, TemplateArgs,
-                           D->getLocation(), D->getDeclName());
-    if (!DI) {
-      DI = D->getTypeSourceInfo();
+  TypeSourceInfo *TSI = D->getTypeSourceInfo();
+  if (TSI->getType()->isInstantiationDependentType() ||
+      TSI->getType()->isVariablyModifiedType()) {
+    TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(),
+                            D->getDeclName());
+    if (!TSI) {
+      TSI = D->getTypeSourceInfo();
       Invalid = true;
-    } else if (DI->getType()->isFunctionType()) {
+    } else if (TSI->getType()->isFunctionType()) {
       // C++ [temp.arg.type]p3:
       //   If a declaration acquires a function type through a type
       //   dependent on a template-parameter and this causes a
@@ -1826,11 +1826,11 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) {
       //   function declarator to have function type, the program is
       //   ill-formed.
       SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
-        << DI->getType();
+          << TSI->getType();
       Invalid = true;
     }
   } else {
-    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
+    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType());
   }
 
   Expr *BitWidth = D->getBitWidth();
@@ -1850,16 +1850,10 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) {
       BitWidth = InstantiatedBitWidth.getAs<Expr>();
   }
 
-  FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(),
-                                            DI->getType(), DI,
-                                            cast<RecordDecl>(Owner),
-                                            D->getLocation(),
-                                            D->isMutable(),
-                                            BitWidth,
-                                            D->getInClassInitStyle(),
-                                            D->getInnerLocStart(),
-                                            D->getAccess(),
-                                            nullptr);
+  FieldDecl *Field = SemaRef.CheckFieldDecl(
+      D->getDeclName(), TSI->getType(), TSI, cast<RecordDecl>(Owner),
+      D->getLocation(), D->isMutable(), BitWidth, D->getInClassInitStyle(),
+      D->getInnerLocStart(), D->getAccess(), nullptr);
   if (!Field) {
     cast<Decl>(Owner)->setInvalidDecl();
     return nullptr;
@@ -1892,19 +1886,19 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) {
 
 Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) {
   bool Invalid = false;
-  TypeSourceInfo *DI = D->getTypeSourceInfo();
+  TypeSourceInfo *TSI = D->getTypeSourceInfo();
 
-  if (DI->getType()->isVariablyModifiedType()) {
+  if (TSI->getType()->isVariablyModifiedType()) {
     SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified)
       << D;
     Invalid = true;
-  } else if (DI->getType()->isInstantiationDependentType())  {
-    DI = SemaRef.SubstType(DI, TemplateArgs,
-                           D->getLocation(), D->getDeclName());
-    if (!DI) {
-      DI = D->getTypeSourceInfo();
+  } else if (TSI->getType()->isInstantiationDependentType()) {
+    TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(),
+                            D->getDeclName());
+    if (!TSI) {
+      TSI = D->getTypeSourceInfo();
       Invalid = true;
-    } else if (DI->getType()->isFunctionType()) {
+    } else if (TSI->getType()->isFunctionType()) {
       // C++ [temp.arg.type]p3:
       //   If a declaration acquires a function type through a type
       //   dependent on a template-parameter and this causes a
@@ -1912,16 +1906,17 @@ Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) {
       //   function declarator to have function type, the program is
       //   ill-formed.
       SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
-      << DI->getType();
+          << TSI->getType();
       Invalid = true;
     }
   } else {
-    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
+    SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType());
   }
 
   MSPropertyDecl *Property = MSPropertyDecl::Create(
-      SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(),
-      DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId());
+      SemaRef.Context, Owner, D->getLocation(), D->getDeclName(),
+      TSI->getType(), TSI, D->getBeginLoc(), D->getGetterId(),
+      D->getSetterId());
 
   SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs,
                            StartingScope);
@@ -3584,7 +3579,7 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
   SmallVector<TypeSourceInfo *, 4> ExpandedParameterPackTypesAsWritten;
   SmallVector<QualType, 4> ExpandedParameterPackTypes;
   bool IsExpandedParameterPack = false;
-  TypeSourceInfo *DI;
+  TypeSourceInfo *TSI;
   QualType T;
   bool Invalid = false;
 
@@ -3594,24 +3589,24 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
     ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes());
     ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes());
     for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
-      TypeSourceInfo *NewDI =
+      TypeSourceInfo *NewTSI =
           SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs,
                             D->getLocation(), D->getDeclName());
-      if (!NewDI)
+      if (!NewTSI)
         return nullptr;
 
       QualType NewT =
-          SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
+          SemaRef.CheckNonTypeTemplateParameterType(NewTSI, D->getLocation());
       if (NewT.isNull())
         return nullptr;
 
-      ExpandedParameterPackTypesAsWritten.push_back(NewDI);
+      ExpandedParameterPackTypesAsWritten.push_back(NewTSI);
       ExpandedParameterPackTypes.push_back(NewT);
     }
 
     IsExpandedParameterPack = true;
-    DI = D->getTypeSourceInfo();
-    T = DI->getType();
+    TSI = D->getTypeSourceInfo();
+    T = TSI->getType();
   } else if (D->isPackExpansion()) {
     // The non-type template parameter pack's type is a pack expansion of types.
     // Determine whether we need to expand this parameter pack into separate
@@ -3637,18 +3632,17 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
     if (Expand) {
       for (unsigned I = 0; I != *NumExpansions; ++I) {
         Sema::ArgPackSubstIndexRAII SubstIndex(SemaRef, I);
-        TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs,
-                                                  D->getLocation(),
-                                                  D->getDeclName());
-        if (!NewDI)
+        TypeSourceInfo *NewTSI = SemaRef.SubstType(
+            Pattern, TemplateArgs, D->getLocation(), D->getDeclName());
+        if (!NewTSI)
           return nullptr;
 
         QualType NewT =
-            SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
+            SemaRef.CheckNonTypeTemplateParameterType(NewTSI, D->getLocation());
         if (NewT.isNull())
           return nullptr;
 
-        ExpandedParameterPackTypesAsWritten.push_back(NewDI);
+        ExpandedParameterPackTypesAsWritten.push_back(NewTSI);
         ExpandedParameterPackTypes.push_back(NewT);
       }
 
@@ -3656,8 +3650,8 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
       // expanded parameter pack is the original expansion type, but callers
       // will end up using the expanded parameter pack types for type-checking.
       IsExpandedParameterPack = true;
-      DI = D->getTypeSourceInfo();
-      T = DI->getType();
+      TSI = D->getTypeSourceInfo();
+      T = TSI->getType();
     } else {
       // We cannot fully expand the pack expansion now, so substitute into the
       // pattern and create a new pack expansion type.
@@ -3669,22 +3663,22 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
         return nullptr;
 
       SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation());
-      DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(),
-                                      NumExpansions);
-      if (!DI)
+      TSI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(),
+                                       NumExpansions);
+      if (!TSI)
         return nullptr;
 
-      T = DI->getType();
+      T = TSI->getType();
     }
   } else {
     // Simple case: substitution into a parameter that is not a parameter pack.
-    DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
-                           D->getLocation(), D->getDeclName());
-    if (!DI)
+    TSI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
+                            D->getLocation(), D->getDeclName());
+    if (!TSI)
       return nullptr;
 
     // Check that this type is acceptable for a non-type template parameter.
-    T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation());
+    T = SemaRef.CheckNonTypeTemplateParameterType(TSI, D->getLocation());
     if (T.isNull()) {
       T = SemaRef.Context.IntTy;
       Invalid = true;
@@ -3696,20 +3690,20 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
     Param = NonTypeTemplateParmDecl::Create(
         SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
         D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
-        D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes,
-        ExpandedParameterPackTypesAsWritten);
+        D->getPosition(), D->getIdentifier(), T, TSI,
+        ExpandedParameterPackTypes, ExpandedParameterPackTypesAsWritten);
   else
     Param = NonTypeTemplateParmDecl::Create(
         SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
         D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
-        D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI);
+        D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), TSI);
 
-  if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc())
+  if (AutoTypeLoc AutoLoc = TSI->getTypeLoc().getContainedAutoTypeLoc())
     if (AutoLoc.isConstrained()) {
       SourceLocation EllipsisLoc;
       if (IsExpandedParameterPack)
         EllipsisLoc =
-            DI->getTypeLoc().getAs<PackExpansionTypeLoc>().getEllipsisLoc();
+            TSI->getTypeLoc().getAs<PackExpansionTypeLoc>().getEllipsisLoc();
       else if (auto *Constraint = dyn_cast_if_present<CXXFoldExpr>(
                    D->getPlaceholderTypeConstraint()))
         EllipsisLoc = Constraint->getEllipsisLoc();
@@ -4642,22 +4636,22 @@ TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
     VarTemplateSpecializationDecl *PrevDecl) {
 
   // Do substitution on the type of the declaration
-  TypeSourceInfo *DI =
+  TypeSourceInfo *TSI =
       SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
                         D->getTypeSpecStartLoc(), D->getDeclName());
-  if (!DI)
+  if (!TSI)
     return nullptr;
 
-  if (DI->getType()->isFunctionType()) {
+  if (TSI->getType()->isFunctionType()) {
     SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
-        << D->isStaticDataMember() << DI->getType();
+        << D->isStaticDataMember() << TSI->getType();
     return nullptr;
   }
 
   // Build the instantiated declaration
   VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create(
       SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
-      VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
+      VarTemplate, TSI->getType(), TSI, D->getStorageClass(), Converted);
   if (!PrevDecl) {
     void *InsertPos = nullptr;
     VarTemplate->findSpecialization(Converted, InsertPos);
@@ -5005,16 +4999,16 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
                                              InstParams, InsertPos);
 
   // Do substitution on the type of the declaration
-  TypeSourceInfo *DI = SemaRef.SubstType(
+  TypeSourceInfo *TSI = SemaRef.SubstType(
       PartialSpec->getTypeSourceInfo(), TemplateArgs,
       PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName());
-  if (!DI)
+  if (!TSI)
     return nullptr;
 
-  if (DI->getType()->isFunctionType()) {
+  if (TSI->getType()->isFunctionType()) {
     SemaRef.Diag(PartialSpec->getLocation(),
                  diag::err_variable_instantiates_to_function)
-        << PartialSpec->isStaticDataMember() << DI->getType();
+        << PartialSpec->isStaticDataMember() << TSI->getType();
     return nullptr;
   }
 
@@ -5022,8 +5016,8 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
   VarTemplatePartialSpecializationDecl *InstPartialSpec =
       VarTemplatePartialSpecializationDecl::Create(
           SemaRef.Context, Owner, PartialSpec->getInnerLocStart(),
-          PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(),
-          DI, PartialSpec->getStorageClass(), CTAI.CanonicalConverted);
+          PartialSpec->getLocation(), InstParams, VarTemplate, TSI->getType(),
+          TSI, PartialSpec->getStorageClass(), CTAI.CanonicalConverted);
 
   InstPartialSpec->setTemplateArgsAsWritten(InstTemplateArgs);
 
@@ -6026,14 +6020,14 @@ VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
          "don't have a definition to instantiate from");
 
   // Do substitution on the type of the declaration
-  TypeSourceInfo *DI =
+  TypeSourceInfo *TSI =
       SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs,
                 PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName());
-  if (!DI)
+  if (!TSI)
     return nullptr;
 
   // Update the type of this variable template specialization.
-  VarSpec->setType(DI->getType());
+  VarSpec->setType(TSI->getType());
 
   // Convert the declaration into a definition now.
   VarSpec->setCompleteDefinition();
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index c483930705057..eb8b1352d1be1 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2795,13 +2795,14 @@ QualType Sema::GetTypeFromParser(ParsedType Ty, TypeSourceInfo **TInfo) {
     return QualType();
   }
 
-  TypeSourceInfo *DI = nullptr;
+  TypeSourceInfo *TSI = nullptr;
   if (const LocInfoType *LIT = dyn_cast<LocInfoType>(QT)) {
     QT = LIT->getType();
-    DI = LIT->getTypeSourceInfo();
+    TSI = LIT->getTypeSourceInfo();
   }
 
-  if (TInfo) *TInfo = DI;
+  if (TInfo)
+    *TInfo = TSI;
   return QT;
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8c20078e97a13..dffd7c1def8e2 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -371,7 +371,7 @@ class TreeTransform {
   /// may override this function (to take over all type
   /// transformations) or some set of the TransformXXXType functions
   /// to alter the transformation.
-  TypeSourceInfo *TransformType(TypeSourceInfo *DI);
+  TypeSourceInfo *TransformType(TypeSourceInfo *TSI);
 
   /// Transform the given type-with-location into a new
   /// type, collecting location information in the given builder
@@ -387,7 +387,7 @@ class TreeTransform {
   /// template arguments.
   /// @{
   QualType TransformTypeWithDeducedTST(QualType T);
-  TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *DI);
+  TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *TSI);
   /// @}
 
   /// The reason why the value of a statement is not discarded, if any.
@@ -4995,15 +4995,15 @@ bool TreeTransform<Derived>::TransformTemplateArgument(
   }
 
   case TemplateArgument::Type: {
-    TypeSourceInfo *DI = Input.getTypeSourceInfo();
-    if (!DI)
-      DI = InventTypeSourceInfo(Input.getArgument().getAsType());
+    TypeSourceInfo *TSI = Input.getTypeSourceInfo();
+    if (!TSI)
+      TSI = InventTypeSourceInfo(Input.getArgument().getAsType());
 
-    DI = getDerived().TransformType(DI);
-    if (!DI)
+    TSI = getDerived().TransformType(TSI);
+    if (!TSI)
       return true;
 
-    Output = TemplateArgumentLoc(TemplateArgument(DI->getType()), DI);
+    Output = TemplateArgumentLoc(TemplateArgument(TSI->getType()), TSI);
     return false;
   }
 
@@ -5360,28 +5360,28 @@ QualType TreeTransform<Derived>::TransformType(QualType T) {
 
   // Temporary workaround.  All of these transformations should
   // eventually turn into transformations on TypeLocs.
-  TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
-                                                getDerived().getBaseLocation());
+  TypeSourceInfo *TSI = getSema().Context.getTrivialTypeSourceInfo(
+      T, getDerived().getBaseLocation());
 
-  TypeSourceInfo *NewDI = getDerived().TransformType(DI);
+  TypeSourceInfo *NewTSI = getDerived().TransformType(TSI);
 
-  if (!NewDI)
+  if (!NewTSI)
     return QualType();
 
-  return NewDI->getType();
+  return NewTSI->getType();
 }
 
-template<typename Derived>
-TypeSourceInfo *TreeTransform<Derived>::TransformType(TypeSourceInfo *DI) {
+template <typename Derived>
+TypeSourceInfo *TreeTransform<Derived>::TransformType(TypeSourceInfo *TSI) {
   // Refine the base location to the type's location.
-  TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
+  TemporaryBase Rebase(*this, TSI->getTypeLoc().getBeginLoc(),
                        getDerived().getBaseEntity());
-  if (getDerived().AlreadyTransformed(DI->getType()))
-    return DI;
+  if (getDerived().AlreadyTransformed(TSI->getType()))
+    return TSI;
 
   TypeLocBuilder TLB;
 
-  TypeLoc TL = DI->getTypeLoc();
+  TypeLoc TL = TSI->getTypeLoc();
   TLB.reserve(TL.getFullDataSize());
 
   QualType Result = getDerived().TransformType(TLB, TL);
@@ -5413,27 +5413,27 @@ QualType TreeTransform<Derived>::TransformTypeWithDeducedTST(QualType T) {
 
   if (getDerived().AlreadyTransformed(T))
     return T;
-  TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
-                                                getDerived().getBaseLocation());
-  TypeSourceInfo *NewDI = getDerived().TransformTypeWithDeducedTST(DI);
-  return NewDI ? NewDI->getType() : QualType();
+  TypeSourceInfo *TSI = getSema().Context.getTrivialTypeSourceInfo(
+      T, getDerived().getBaseLocation());
+  TypeSourceInfo *NewTSI = getDerived().TransformTypeWithDeducedTST(TSI);
+  return NewTSI ? NewTSI->getType() : QualType();
 }
 
-template<typename Derived>
+template <typename Derived>
 TypeSourceInfo *
-TreeTransform<Derived>::TransformTypeWithDeducedTST(TypeSourceInfo *DI) {
-  if (!isa<DependentNameType>(DI->getType()))
-    return TransformType(DI);
+TreeTransform<Derived>::TransformTypeWithDeducedTST(TypeSourceInfo *TSI) {
+  if (!isa<DependentNameType>(TSI->getType()))
+    return TransformType(TSI);
 
   // Refine the base location to the type's location.
-  TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
+  TemporaryBase Rebase(*this, TSI->getTypeLoc().getBeginLoc(),
                        getDerived().getBaseEntity());
-  if (getDerived().AlreadyTransformed(DI->getType()))
-    return DI;
+  if (getDerived().AlreadyTransformed(TSI->getType()))
+    return TSI;
 
   TypeLocBuilder TLB;
 
-  TypeLoc TL = DI->getTypeLoc();
+  TypeLoc TL = TSI->getTypeLoc();
   TLB.reserve(TL.getFullDataSize());
 
   auto QTL = TL.getAs<QualifiedTypeLoc>();
@@ -6258,17 +6258,17 @@ template <typename Derived>
 ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
     ParmVarDecl *OldParm, int indexAdjustment, UnsignedOrNone NumExpansions,
     bool ExpectParameterPack) {
-  TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo();
-  TypeSourceInfo *NewDI = nullptr;
+  TypeSourceInfo *OldTSI = OldParm->getTypeSourceInfo();
+  TypeSourceInfo *NewTSI = nullptr;
 
-  if (NumExpansions && isa<PackExpansionType>(OldDI->getType())) {
+  if (NumExpansions && isa<PackExpansionType>(OldTSI->getType())) {
     // If we're substituting into a pack expansion type and we know the
     // length we want to expand to, just substitute for the pattern.
-    TypeLoc OldTL = OldDI->getTypeLoc();
+    TypeLoc OldTL = OldTSI->getTypeLoc();
     PackExpansionTypeLoc OldExpansionTL = OldTL.castAs<PackExpansionTypeLoc>();
 
     TypeLocBuilder TLB;
-    TypeLoc NewTL = OldDI->getTypeLoc();
+    TypeLoc NewTL = OldTSI->getTypeLoc();
     TLB.reserve(NewTL.getFullDataSize());
 
     QualType Result = getDerived().TransformType(TLB,
@@ -6286,24 +6286,20 @@ ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
     PackExpansionTypeLoc NewExpansionTL
       = TLB.push<PackExpansionTypeLoc>(Result);
     NewExpansionTL.setEllipsisLoc(OldExpansionTL.getEllipsisLoc());
-    NewDI = TLB.getTypeSourceInfo(SemaRef.Context, Result);
+    NewTSI = TLB.getTypeSourceInfo(SemaRef.Context, Result);
   } else
-    NewDI = getDerived().TransformType(OldDI);
-  if (!NewDI)
+    NewTSI = getDerived().TransformType(OldTSI);
+  if (!NewTSI)
     return nullptr;
 
-  if (NewDI == OldDI && indexAdjustment == 0)
+  if (NewTSI == OldTSI && indexAdjustment == 0)
     return OldParm;
 
-  ParmVarDecl *newParm = ParmVarDecl::Create(SemaRef.Context,
-                                             OldParm->getDeclContext(),
-                                             OldParm->getInnerLocStart(),
-                                             OldParm->getLocation(),
-                                             OldParm->getIdentifier(),
-                                             NewDI->getType(),
-                                             NewDI,
-                                             OldParm->getStorageClass(),
-                                             /* DefArg */ nullptr);
+  ParmVarDecl *newParm = ParmVarDecl::Create(
+      SemaRef.Context, OldParm->getDeclContext(), OldParm->getInnerLocStart(),
+      OldParm->getLocation(), OldParm->getIdentifier(), NewTSI->getType(),
+      NewTSI, OldParm->getStorageClass(),
+      /* DefArg */ nullptr);
   newParm->setScopeInfo(OldParm->getFunctionScopeDepth(),
                         OldParm->getFunctionScopeIndex() + indexAdjustment);
   getDerived().transformedLocalDecl(OldParm, {newParm});
diff --git a/clang/lib/Sema/TypeLocBuilder.h b/clang/lib/Sema/TypeLocBuilder.h
index 0c27088a1748b..e84e79aee8f0d 100644
--- a/clang/lib/Sema/TypeLocBuilder.h
+++ b/clang/lib/Sema/TypeLocBuilder.h
@@ -113,9 +113,9 @@ class TypeLocBuilder {
 #endif
 
     size_t FullDataSize = Capacity - Index;
-    TypeSourceInfo *DI = Context.CreateTypeSourceInfo(T, FullDataSize);
-    memcpy(DI->getTypeLoc().getOpaqueData(), &Buffer[Index], FullDataSize);
-    return DI;
+    TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T, FullDataSize);
+    memcpy(TSI->getTypeLoc().getOpaqueData(), &Buffer[Index], FullDataSize);
+    return TSI;
   }
 
   /// Copies the type-location information to the given AST context and
diff --git a/clang/test/AST/ByteCode/invalid.cpp b/clang/test/AST/ByteCode/invalid.cpp
index 00db27419e36b..1f2d6bc1d48eb 100644
--- a/clang/test/AST/ByteCode/invalid.cpp
+++ b/clang/test/AST/ByteCode/invalid.cpp
@@ -66,3 +66,26 @@ struct S {
 S s;
 S *sp[2] = {&s, &s};
 S *&spp = sp[1];
+
+namespace InvalidBitCast {
+  void foo() {
+    const long long int i = 1; // both-note {{declared const here}}
+    if (*(double *)&i == 2) {
+      i = 0; // both-error {{cannot assign to variable}}
+    }
+  }
+
+  struct S2 {
+    void *p;
+  };
+  struct T {
+    S2 s;
+  };
+  constexpr T t = {{nullptr}};
+  constexpr void *foo2() { return ((void **)&t)[0]; } // both-error {{never produces a constant expression}} \
+                                                      // both-note 2{{cast that performs the conversions of a reinterpret_cast}}
+  constexpr auto x = foo2(); // both-error {{must be initialized by a constant expression}} \
+                             // both-note {{in call to}}
+
+
+}
diff --git a/clang/test/AST/ByteCode/records.cpp b/clang/test/AST/ByteCode/records.cpp
index 83f32c97c50c7..4799ebe25dde1 100644
--- a/clang/test/AST/ByteCode/records.cpp
+++ b/clang/test/AST/ByteCode/records.cpp
@@ -1882,3 +1882,14 @@ namespace MethodWillHaveBody {
   }
   int n = f(0); // both-note {{instantiation of}}
 }
+
+namespace StaticRedecl {
+  struct T {
+    static T tt;
+    constexpr T() : p(&tt) {}
+    T *p;
+  };
+  T T::tt;
+  constexpr T t;
+  static_assert(t.p == &T::tt, "");
+}
diff --git a/clang/test/AST/ast-dump-arm-attr.c b/clang/test/AST/ast-dump-arm-attr.c
index 78f557d4eb0b1..d26a77d067e97 100644
--- a/clang/test/AST/ast-dump-arm-attr.c
+++ b/clang/test/AST/ast-dump-arm-attr.c
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -triple arm-apple-darwin -ast-dump -ast-dump-filter Test %s \
 // RUN: | FileCheck --strict-whitespace %s
 //
-// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \
+// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \
 // RUN: | FileCheck --strict-whitespace %s --check-prefix=CHECK-CMSE
 //
 // Tests with serialization:
@@ -11,8 +11,8 @@
 // RUN: | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" \
 // RUN: | FileCheck --strict-whitespace %s
 //
-// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -emit-pch -o %t %s
-// RUN: %clang_cc1 -x c -triple armv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \
+// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -emit-pch -o %t %s
+// RUN: %clang_cc1 -x c -triple thumbv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \
 // RUN: | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" \
 // RUN: | FileCheck --strict-whitespace %s
 
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index 65799881a0cbe..d5bea8446d730 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -46,6 +46,32 @@ void f2(void) {
 // OGCG-NEXT:    store i32 42, ptr %[[SLOT]], align 4
 // OGCG:       }
 
+void f3(_Atomic(int) *p) {
+  *p = 42;
+}
+
+// CIR-LABEL: @f3
+// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @f3
+// LLVM: store atomic i32 42, ptr %{{.+}} seq_cst, align 4
+
+// OGCG-LABEL: @f3
+// OGCG: store atomic i32 42, ptr %{{.+}} seq_cst, align 4
+
+void f4(_Atomic(float) *p) {
+  *p = 3.14;
+}
+
+// CIR-LABEL: @f4
+// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM-LABEL: @f4
+// LLVM: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4
+
+// OGCG-LABEL: @f4
+// OGCG: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4
+
 void load(int *ptr) {
   int x;
   __atomic_load(ptr, &x, __ATOMIC_RELAXED);
diff --git a/clang/test/CodeGen/arm-acle-coproc.c b/clang/test/CodeGen/arm-acle-coproc.c
index 5acb9f65413a0..000fff632f0b7 100644
--- a/clang/test/CodeGen/arm-acle-coproc.c
+++ b/clang/test/CodeGen/arm-acle-coproc.c
@@ -4,10 +4,10 @@
 // RUN: %clang_cc1 -triple armv5te %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s
 // RUN: %clang_cc1 -triple armv5tej %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s
 // RUN: %clang_cc1 -triple armv6 %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6 %s
-// RUN: %clang_cc1 -triple armv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s
+// RUN: %clang_cc1 -triple thumbv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s
 // RUN: %clang_cc1 -triple armv7a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s
 // RUN: %clang_cc1 -triple armv7r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s
-// RUN: %clang_cc1 -triple armv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s
+// RUN: %clang_cc1 -triple thumbv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s
 // RUN: %clang_cc1 -triple armv8a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s
 // RUN: %clang_cc1 -triple armv8r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s
 // RUN: %clang_cc1 -triple armv8.1a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s
diff --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp
index c95f7fb8cd9c3..3a67904a8e568 100644
--- a/clang/test/CodeGen/pr45476.cpp
+++ b/clang/test/CodeGen/pr45476.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple armv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s
+// RUN: %clang_cc1 -triple thumbv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s
 // RUN: %clang_cc1 -triple armv8-eabi -emit-llvm %s -o - | FileCheck -check-prefix=NATIVE %s
 // PR45476
 
diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp
index 881e816292d59..012c017798a1f 100644
--- a/clang/test/Sema/attr-nonblocking-constraints.cpp
+++ b/clang/test/Sema/attr-nonblocking-constraints.cpp
@@ -104,6 +104,25 @@ void nb8c()
 	};
 }
 
+void nb8d() [[clang::nonblocking]]
+{
+	// Blocking methods of a local CXXRecordDecl do not generate diagnostics
+	// for the outer function.
+	struct F1 {
+        void method() { void* ptr = new int; }
+	};
+
+	// Skipping the CXXRecordDecl does not skip a following VarDecl.
+	struct F2 {
+        F2() { void* ptr = new int; } // expected-note {{constructor cannot be inferred 'nonblocking' because it allocates or deallocates memory}}
+	} f2; // expected-warning {{function with 'nonblocking' attribute must not call non-'nonblocking' constructor 'nb8d()::F2::F2'}}
+
+	// Nonblocking methods of a local CXXRecordDecl are verified independently.
+	struct F3 {
+		void method() [[clang::nonblocking]] { void* ptr = new int; }// expected-warning {{function with 'nonblocking' attribute must not allocate or deallocate memory}}
+	};
+}
+
 // Make sure template expansions are found and verified.
 	template <typename T>
 	struct Adder {
diff --git a/clang/test/Sema/builtins-arm-exclusive-124.c b/clang/test/Sema/builtins-arm-exclusive-124.c
index b35ac181f0887..93540879a01ba 100644
--- a/clang/test/Sema/builtins-arm-exclusive-124.c
+++ b/clang/test/Sema/builtins-arm-exclusive-124.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple armv7m -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple armv8m.main -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple thumbv7m -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple thumbv8m.main -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple armv8.1m.main -fsyntax-only -verify %s
 
 // All these architecture versions provide 1-, 2- or 4-byte exclusive accesses,
diff --git a/clang/test/Sema/builtins-arm-exclusive-none.c b/clang/test/Sema/builtins-arm-exclusive-none.c
index 2ef910dd99aaf..25a71e18935a6 100644
--- a/clang/test/Sema/builtins-arm-exclusive-none.c
+++ b/clang/test/Sema/builtins-arm-exclusive-none.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple armv6m -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple thumbv6m -fsyntax-only -verify %s
 
 // Armv6-M does not support exclusive loads/stores at all, so all uses of
 // __builtin_arm_ldrex[d] and __builtin_arm_strex[d] is forbidden.
diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp
index f503e2fc311d1..169af5cacc6c7 100644
--- a/clang/test/SemaCXX/dllexport.cpp
+++ b/clang/test/SemaCXX/dllexport.cpp
@@ -1,13 +1,13 @@
-// RUN: %clang_cc1 -triple i686-win32             -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DMS  %s
-// RUN: %clang_cc1 -triple x86_64-win32           -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DMS  %s
-// RUN: %clang_cc1 -triple i686-mingw32           -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s
-// RUN: %clang_cc1 -triple x86_64-mingw32         -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s
-// RUN: %clang_cc1 -triple i686-pc-cygwin         -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s
-// RUN: %clang_cc1 -triple x86_64-pc-cygwin       -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s
-// RUN: %clang_cc1 -triple i686-windows-itanium   -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI  %s
-// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI  %s
-// RUN: %clang_cc1 -triple x86_64-scei-ps4        -fsyntax-only -fdeclspec      -verify -std=c++11 -Wunsupported-dll-base-class-template -DPS  %s
-// RUN: %clang_cc1 -triple x86_64-sie-ps5         -fsyntax-only -fdeclspec      -verify -std=c++1y -Wunsupported-dll-base-class-template -DPS  %s
+// RUN: %clang_cc1 -triple i686-win32             -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps        -std=c++11 -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-win32           -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps        -std=c++1y -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple i686-mingw32           -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu      -std=c++1y -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-mingw32         -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu      -std=c++11 -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple i686-pc-cygwin         -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu      -std=c++1y -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-pc-cygwin       -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu      -std=c++11 -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple i686-windows-itanium   -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu  -std=c++11 -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu  -std=c++1y -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-scei-ps4        -fsyntax-only -fdeclspec      -verify=expected,non-ms,non-gnu,ms-ps    -std=c++11 -Wunsupported-dll-base-class-template %s
+// RUN: %clang_cc1 -triple x86_64-sie-ps5         -fsyntax-only -fdeclspec      -verify=expected,non-ms,non-gnu,ms-ps    -std=c++1y -Wunsupported-dll-base-class-template %s
 
 // Helper structs to make templates more expressive.
 struct ImplicitInst_Exported {};
@@ -75,9 +75,7 @@ __declspec(dllexport) extern int GlobalRedecl4; // expected-warning{{redeclarati
 // External linkage is required.
 __declspec(dllexport) static int StaticGlobal; // expected-error{{'StaticGlobal' must have external linkage when declared 'dllexport'}}
 __declspec(dllexport) Internal InternalTypeGlobal; // expected-error{{'InternalTypeGlobal' must have external linkage when declared 'dllexport'}}
-#ifndef MS
-namespace    { __declspec(dllexport) int InternalGlobal; } // expected-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}}
-#endif
+namespace    { __declspec(dllexport) int InternalGlobal; } // non-ms-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}}
 namespace ns { __declspec(dllexport) int ExternalGlobal; }
 
 __declspec(dllexport) auto InternalAutoTypeGlobal = Internal(); // expected-error{{'InternalAutoTypeGlobal' must have external linkage when declared 'dllexport'}}
@@ -132,9 +130,7 @@ template<typename T> __declspec(dllexport) extern int VarTmplRedecl3; // expecte
 // External linkage is required.
 template<typename T> __declspec(dllexport) static int StaticVarTmpl; // expected-error{{'StaticVarTmpl' must have external linkage when declared 'dllexport'}}
 template<typename T> __declspec(dllexport) Internal InternalTypeVarTmpl; // expected-error{{'InternalTypeVarTmpl' must have external linkage when declared 'dllexport'}}
-#ifndef MS
-namespace    { template<typename T> __declspec(dllexport) int InternalVarTmpl; } // expected-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}}
-#endif
+namespace    { template<typename T> __declspec(dllexport) int InternalVarTmpl; } // non-ms-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}}
 namespace ns { template<typename T> __declspec(dllexport) int ExternalVarTmpl = 1; }
 
 template<typename T> __declspec(dllexport) auto InternalAutoTypeVarTmpl = Internal(); // expected-error{{'InternalAutoTypeVarTmpl' must have external linkage when declared 'dllexport'}}
@@ -355,11 +351,8 @@ class __declspec(dllexport) ClassDecl;
 
 class __declspec(dllexport) ClassDef {};
 
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-warning@+3{{'dllexport' attribute ignored}}
-#endif
 template <typename T> struct PartiallySpecializedClassTemplate {};
-template <typename T> struct __declspec(dllexport) PartiallySpecializedClassTemplate<T*> { void f() {} };
+template <typename T> struct __declspec(dllexport) PartiallySpecializedClassTemplate<T*> { void f() {} }; // non-gnu-warning {{'dllexport' attribute ignored}}
 
 template <typename T> struct ExpliciallySpecializedClassTemplate {};
 template <> struct __declspec(dllexport) ExpliciallySpecializedClassTemplate<int> { void f() {} };
@@ -373,16 +366,11 @@ ImplicitlyInstantiatedExportedTemplate<IncompleteType> implicitlyInstantiatedExp
 
 // Don't instantiate class members of templates with explicit instantiation declarations, even if they are exported.
 struct IncompleteType2;
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-note@+2{{attribute is here}}
-#endif
-template <typename T> struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl {
+
+template <typename T> struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl { // non-gnu-note {{attribute is here}}
   int f() { return sizeof(T); } // no-error
 };
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}}
-#endif
-extern template struct ExportedTemplateWithExplicitInstantiationDecl<IncompleteType2>;
+extern template struct ExportedTemplateWithExplicitInstantiationDecl<IncompleteType2>; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}}
 
 // Instantiate class members for explicitly instantiated exported templates.
 struct IncompleteType3; // expected-note{{forward declaration of 'IncompleteType3'}}
@@ -392,16 +380,9 @@ template <typename T> struct __declspec(dllexport) ExplicitlyInstantiatedExporte
 template struct ExplicitlyInstantiatedExportedTemplate<IncompleteType3>; // expected-note{{in instantiation of member function 'ExplicitlyInstantiatedExportedTemplate<IncompleteType3>::f' requested here}}
 
 // In MS mode, instantiate members of class templates that are base classes of exported classes.
-#if defined(MS) || defined(PS)
-  // expected-note@+3{{forward declaration of 'IncompleteType4'}}
-  // expected-note@+3{{in instantiation of member function 'BaseClassTemplateOfExportedClass<IncompleteType4>::f' requested here}}
-#endif
-struct IncompleteType4;
-template <typename T> struct BaseClassTemplateOfExportedClass {
-#if defined(MS) || defined(PS)
-  // expected-error@+2{{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}}
-#endif
-  int f() { return sizeof(T); };
+struct IncompleteType4; // ms-ps-note {{forward declaration of 'IncompleteType4'}}
+template <typename T> struct BaseClassTemplateOfExportedClass { // ms-ps-note {{in instantiation of member function 'BaseClassTemplateOfExportedClass<IncompleteType4>::f' requested here}}
+  int f() { return sizeof(T); }; // ms-ps-error {{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}}
 };
 struct __declspec(dllexport) ExportedBaseClass : public BaseClassTemplateOfExportedClass<IncompleteType4> {};
 
@@ -414,17 +395,11 @@ struct __declspec(dllexport) ExportedBaseClass2 : public ExportedBaseClassTempla
 
 // Warn about explicit instantiation declarations of dllexport classes.
 template <typename T> struct ExplicitInstantiationDeclTemplate {};
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} expected-note@+2{{attribute is here}}
-#endif
-extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate<int>;
+extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate<int>; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} \
+                                                                                        non-gnu-note {{attribute is here}}
 
-template <typename T> struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {};
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-note@-2{{attribute is here}}
-// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}}
-#endif
-extern template struct ExplicitInstantiationDeclExportedTemplate<int>;
+template <typename T> struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {}; // non-gnu-note {{attribute is here}}
+extern template struct ExplicitInstantiationDeclExportedTemplate<int>; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}}
 
 namespace { struct InternalLinkageType {}; }
 struct __declspec(dllexport) PR23308 {
@@ -440,35 +415,23 @@ class __declspec(dllexport) ExportedClass {};
 class __declspec(dllimport) ImportedClass {};
 
 template <typename T> class ClassTemplate {};
-#if not defined(MS) && not defined(PS)
-// expected-error@+2{{'ExportedClassTemplate<LocalCRTP>' must have external linkage when declared 'dllexport'}}
-#endif
-template <typename T> class __declspec(dllexport) ExportedClassTemplate {};
+template <typename T> class __declspec(dllexport) ExportedClassTemplate {}; // win-gnu-error {{'ExportedClassTemplate<LocalCRTP>' must have external linkage when declared 'dllexport'}}
 template <typename T> class __declspec(dllimport) ImportedClassTemplate {};
 
 template <typename T> struct ExplicitlySpecializedTemplate { void func() {} };
-#if defined(MS) || defined(PS)
-// expected-note@+2{{class template 'ExplicitlySpecializedTemplate<int>' was explicitly specialized here}}
-#endif
-template <> struct ExplicitlySpecializedTemplate<int> { void func() {} };
+template <> struct ExplicitlySpecializedTemplate<int> { void func() {} }; // ms-ps-note {{class template 'ExplicitlySpecializedTemplate<int>' was explicitly specialized here}}
 template <typename T> struct ExplicitlyExportSpecializedTemplate { void func() {} };
 template <> struct __declspec(dllexport) ExplicitlyExportSpecializedTemplate<int> { void func() {} };
 template <typename T> struct ExplicitlyImportSpecializedTemplate { void func() {} };
 template <> struct __declspec(dllimport) ExplicitlyImportSpecializedTemplate<int> { void func() {} };
 
 template <typename T> struct ExplicitlyInstantiatedTemplate { void func() {} };
-#if defined(MS) || defined(PS)
-// expected-note@+2{{class template 'ExplicitlyInstantiatedTemplate<int>' was instantiated here}}
-#endif
-template struct ExplicitlyInstantiatedTemplate<int>;
+template struct ExplicitlyInstantiatedTemplate<int>; // ms-ps-note {{class template 'ExplicitlyInstantiatedTemplate<int>' was instantiated here}}
 template <typename T> struct ExplicitlyExportInstantiatedTemplate { void func() {} };
 template struct __declspec(dllexport) ExplicitlyExportInstantiatedTemplate<int>;
 template <typename T> struct ExplicitlyExportDeclaredInstantiatedTemplate { void func() {} };
 extern template struct ExplicitlyExportDeclaredInstantiatedTemplate<int>;
-#if not defined(MS) && not defined (WI) && not defined(PS)
-// expected-warning@+2{{'dllexport' attribute ignored on explicit instantiation definition}}
-#endif
-template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate<int>;
+template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate<int>; // gnu-warning {{'dllexport' attribute ignored on explicit instantiation definition}}
 template <typename T> struct ExplicitlyImportInstantiatedTemplate { void func() {} };
 template struct __declspec(dllimport) ExplicitlyImportInstantiatedTemplate<int>;
 
@@ -496,11 +459,8 @@ class __declspec(dllexport) DerivedFromTemplateB : public ClassTemplate<bool> {}
 // The second derived class doesn't change anything, the attribute that was propagated first wins.
 class __declspec(dllimport) DerivedFromTemplateB2 : public ClassTemplate<bool> {};
 
-#if defined(MS) || defined(PS)
-// expected-warning@+3{{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}}
-// expected-note@+2{{attribute is here}}
-#endif
-struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate<int> {};
+struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate<int> {}; // ms-ps-warning {{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}} \
+                                                                                                                         ms-ps-note {{attribute is here}}
 
 // Base class alredy specialized with export attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : public ExplicitlyExportSpecializedTemplate<int> {};
@@ -508,11 +468,8 @@ struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : pu
 // Base class already specialized with import attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyImportSpecializedTemplate : public ExplicitlyImportSpecializedTemplate<int> {};
 
-#if defined(MS) || defined(PS)
-// expected-warning@+3{{propagating dll attribute to already instantiated base class template without dll attribute is not supported}}
-// expected-note@+2{{attribute is here}}
-#endif
-struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate<int> {};
+struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate<int> {}; // ms-ps-warning {{propagating dll attribute to already instantiated base class template without dll attribute is not supported}} \
+                                                                                                                           ms-ps-note {{attribute is here}}
 
 // Base class already instantiated with export attribute.
 struct __declspec(dllexport) DerivedFromExplicitlyExportInstantiatedTemplate : public ExplicitlyExportInstantiatedTemplate<int> {};
@@ -528,10 +485,7 @@ void func() {
   // MSVC allows deriving from exported template classes in local contexts.
   class LocalDerivedFromExportedClass : public ExportedClass {};
   class LocalDerivedFromExportedTemplate : public ExportedClassTemplate<int> {};
-#if not defined(MS) && not defined (PS)
-  // expected-note@+2{{in instantiation of template class 'ExportedClassTemplate<LocalCRTP>' requested here}}
-#endif
-  class LocalCRTP : public ExportedClassTemplate<LocalCRTP> {};
+  class LocalCRTP : public ExportedClassTemplate<LocalCRTP> {}; // win-gnu-note {{in instantiation of template class 'ExportedClassTemplate<LocalCRTP>' requested here}}
 }
 
 //===----------------------------------------------------------------------===//
@@ -778,46 +732,40 @@ __declspec(dllexport)        void MemberRedecl::staticInlineDecl() {}  // expect
 
 __declspec(dllexport)        int  MemberRedecl::StaticField = 1;       // expected-error{{redeclaration of 'MemberRedecl::StaticField' cannot add 'dllexport' attribute}}
 __declspec(dllexport) const  int  MemberRedecl::StaticConstField = 1;  // expected-error{{redeclaration of 'MemberRedecl::StaticConstField' cannot add 'dllexport' attribute}}
-#ifdef MS
-// expected-warning@+4{{attribute declaration must precede definition}}
-#else
-// expected-error@+2{{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}}
-#endif
-__declspec(dllexport) constexpr int MemberRedecl::ConstexprField;
 
-#ifdef MS
+__declspec(dllexport) constexpr int MemberRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \
+                                                                     non-ms-error {{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}}
+
 struct __declspec(dllexport) ClassWithMultipleDefaultCtors {
-  ClassWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}}
-  ClassWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}}
+  ClassWithMultipleDefaultCtors(int = 40) {} // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}}
+  ClassWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}}
 };
 template <typename T>
 struct ClassTemplateWithMultipleDefaultCtors {
-  __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {}      // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}}
-  __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}}
+  __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {}      // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}}
+  __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}}
 };
 
 template <typename T> struct HasDefaults {
-  HasDefaults(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}}
+  HasDefaults(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}}
 };
 template struct __declspec(dllexport) HasDefaults<char>;
 
 template struct
-__declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults<void>' required here}}
-HasDefaults<void>; // expected-note {{in instantiation of member function 'HasDefaults<void>::HasDefaults' requested here}}
+__declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults<void>' required here}}
+HasDefaults<void>; // ms-note {{in instantiation of member function 'HasDefaults<void>::HasDefaults' requested here}}
 
 template <typename T> struct HasDefaults2 {
-  __declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults2<void>' required here}}
-  HasDefaults2(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}}
+  __declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults2<void>' required here}}
+  HasDefaults2(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}}
 };
-template struct HasDefaults2<void>; // expected-note {{in instantiation of member function 'HasDefaults2<void>::HasDefaults2' requested here}}
+template struct HasDefaults2<void>; // ms-note {{in instantiation of member function 'HasDefaults2<void>::HasDefaults2' requested here}}
 
-template <typename T> struct __declspec(dllexport) HasDefaults3 { // expected-note{{in instantiation of default function argument expression for 'HasDefaults3<void>' required here}}
-  HasDefaults3(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}}
+template <typename T> struct __declspec(dllexport) HasDefaults3 { // ms-note{{in instantiation of default function argument expression for 'HasDefaults3<void>' required here}}
+  HasDefaults3(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}}
 };
 template <> HasDefaults3<void>::HasDefaults3(int) {};
 
-#endif
-
 //===----------------------------------------------------------------------===//
 // Class member templates
 //===----------------------------------------------------------------------===//
@@ -887,12 +835,8 @@ template<typename T> __declspec(dllexport)        void MemTmplRedecl::staticInli
 template<typename T> __declspec(dllexport)        int  MemTmplRedecl::StaticField = 1;      // expected-error{{redeclaration of 'MemTmplRedecl::StaticField' cannot add 'dllexport' attribute}}
 template<typename T> __declspec(dllexport) const  int  MemTmplRedecl::StaticConstField = 1; // expected-error{{redeclaration of 'MemTmplRedecl::StaticConstField' cannot add 'dllexport' attribute}}
 
-#ifdef MS
-// expected-warning@+4{{attribute declaration must precede definition}}
-#else
-// expected-error@+2{{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}}
-#endif
-template<typename T> __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField;
+template<typename T> __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \
+                                                                                           non-ms-error {{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}}
 #endif // __has_feature(cxx_variable_templates)
 
 
@@ -1097,20 +1041,13 @@ template<typename T> __declspec(dllexport)        void CTMR<T>::staticInlineDecl
 
 template<typename T> __declspec(dllexport)        int  CTMR<T>::StaticField = 1;       // expected-error{{redeclaration of 'CTMR::StaticField' cannot add 'dllexport' attribute}}
 template<typename T> __declspec(dllexport) const  int  CTMR<T>::StaticConstField = 1;  // expected-error{{redeclaration of 'CTMR::StaticConstField' cannot add 'dllexport' attribute}}
-#ifdef MS
-// expected-warning@+4{{attribute declaration must precede definition}}
-#else
-// expected-error@+2{{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}}
-#endif
-template<typename T> __declspec(dllexport) constexpr int CTMR<T>::ConstexprField;
+template<typename T> __declspec(dllexport) constexpr int CTMR<T>::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \
+                                                                                     non-ms-error {{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}}
 
 // MSVC exports explicit specialization of exported class template member
 // function, and errors on such definitions. MinGW does not treat them as
 // dllexport.
-#if !defined(GNU)
-// expected-error@+2{{attribute 'dllexport' cannot be applied to a deleted function}}
-#endif
-template <> void ExportClassTmplMembers<int>::normalDecl() = delete;
+template <> void ExportClassTmplMembers<int>::normalDecl() = delete; // non-gnu-error {{attribute 'dllexport' cannot be applied to a deleted function}}
 
 
 //===----------------------------------------------------------------------===//
@@ -1183,12 +1120,8 @@ template<typename T> template<typename U> __declspec(dllexport)        void CTMT
 #if __has_feature(cxx_variable_templates)
 template<typename T> template<typename U> __declspec(dllexport)        int  CTMTR<T>::StaticField = 1;       // expected-error{{redeclaration of 'CTMTR::StaticField' cannot add 'dllexport' attribute}}
 template<typename T> template<typename U> __declspec(dllexport) const  int  CTMTR<T>::StaticConstField = 1;  // expected-error{{redeclaration of 'CTMTR::StaticConstField' cannot add 'dllexport' attribute}}
-#ifdef MS
-// expected-warning@+4{{attribute declaration must precede definition}}
-#else
-// expected-error@+2{{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}}
-#endif
-template<typename T> template<typename U> __declspec(dllexport) constexpr int CTMTR<T>::ConstexprField;
+template<typename T> template<typename U> __declspec(dllexport) constexpr int CTMTR<T>::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \
+                                                                                                           non-ms-error {{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}}
 #endif // __has_feature(cxx_variable_templates)
 
 // FIXME: Precedence rules seem to be different for classes.
@@ -1197,7 +1130,4 @@ template<typename T> template<typename U> __declspec(dllexport) constexpr int CT
 // Lambdas
 //===----------------------------------------------------------------------===//
 // The MS ABI doesn't provide a stable mangling for lambdas, so they can't be imported or exported.
-#if defined(MS) || defined (WI) || defined(PS)
-// expected-error@+2{{lambda cannot be declared 'dllexport'}}
-#endif
-auto Lambda = []() __declspec(dllexport) -> bool { return true; };
+auto Lambda = []() __declspec(dllexport) -> bool { return true; }; // non-gnu-error {{lambda cannot be declared 'dllexport'}}
diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp
index ef58da61e371b..bb55a8163d938 100644
--- a/flang/examples/FeatureList/FeatureList.cpp
+++ b/flang/examples/FeatureList/FeatureList.cpp
@@ -348,6 +348,7 @@ struct NodeVisitor {
   READ_FEATURE(TeamValue)
   READ_FEATURE(ImageSelector)
   READ_FEATURE(ImageSelectorSpec)
+  READ_FEATURE(ImageSelectorSpec::Notify)
   READ_FEATURE(ImageSelectorSpec::Stat)
   READ_FEATURE(ImageSelectorSpec::Team_Number)
   READ_FEATURE(ImplicitPart)
diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h
index 48aafa8982559..d63c16f93230a 100644
--- a/flang/include/flang/Evaluate/traverse.h
+++ b/flang/include/flang/Evaluate/traverse.h
@@ -146,7 +146,7 @@ class Traverse {
     return Combine(x.base(), x.subscript());
   }
   Result operator()(const CoarrayRef &x) const {
-    return Combine(x.base(), x.cosubscript(), x.stat(), x.team());
+    return Combine(x.base(), x.cosubscript(), x.notify(), x.stat(), x.team());
   }
   Result operator()(const DataRef &x) const { return visitor_(x.u); }
   Result operator()(const Substring &x) const {
diff --git a/flang/include/flang/Evaluate/variable.h b/flang/include/flang/Evaluate/variable.h
index 5c14421fd3a1b..4f64ede3d407d 100644
--- a/flang/include/flang/Evaluate/variable.h
+++ b/flang/include/flang/Evaluate/variable.h
@@ -260,6 +260,9 @@ class CoarrayRef {
   // it's TEAM=.
   std::optional<Expr<SomeType>> team() const;
   CoarrayRef &set_team(Expr<SomeType> &&);
+  // When notify() is Expr<Some>, it's NOTIFY=.
+  std::optional<Expr<SomeType>> notify() const;
+  CoarrayRef &set_notify(Expr<SomeType> &&);
 
   int Rank() const;
   int Corank() const { return 0; }
@@ -272,6 +275,7 @@ class CoarrayRef {
 private:
   common::CopyableIndirection<DataRef> base_;
   std::vector<Expr<SubscriptInteger>> cosubscript_;
+  std::optional<common::CopyableIndirection<Expr<SomeType>>> notify_;
   std::optional<common::CopyableIndirection<Expr<SomeInteger>>> stat_;
   std::optional<common::CopyableIndirection<Expr<SomeType>>> team_;
 };
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index de2716410d6cd..b2424023b0168 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -387,6 +387,7 @@ class ParseTreeDumper {
   NODE(parser, TeamValue)
   NODE(parser, ImageSelector)
   NODE(parser, ImageSelectorSpec)
+  NODE(ImageSelectorSpec, Notify)
   NODE(ImageSelectorSpec, Stat)
   NODE(ImageSelectorSpec, Team_Number)
   NODE(parser, ImplicitPart)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 8c7578f7a1941..32e444fbb2e6c 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -1684,13 +1684,15 @@ using Cosubscript = ScalarIntExpr;
 WRAPPER_CLASS(TeamValue, Scalar<common::Indirection<Expr>>);
 
 // R926 image-selector-spec ->
+//        NOTIFY = notify-variable |
 //        STAT = stat-variable | TEAM = team-value |
 //        TEAM_NUMBER = scalar-int-expr
 struct ImageSelectorSpec {
   WRAPPER_CLASS(Stat, Scalar<Integer<common::Indirection<Variable>>>);
   WRAPPER_CLASS(Team_Number, ScalarIntExpr);
+  WRAPPER_CLASS(Notify, Scalar<common::Indirection<Variable>>);
   UNION_CLASS_BOILERPLATE(ImageSelectorSpec);
-  std::variant<Stat, TeamValue, Team_Number> u;
+  std::variant<Notify, Stat, TeamValue, Team_Number> u;
 };
 
 // R924 image-selector ->
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 8a7b9867c0979..1c3477013b559 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -107,6 +107,7 @@ bool IsBindCProcedure(const Scope &);
 // Returns a pointer to the function's symbol when true, else null
 const Symbol *IsFunctionResultWithSameNameAsFunction(const Symbol &);
 bool IsOrContainsEventOrLockComponent(const Symbol &);
+bool IsOrContainsNotifyComponent(const Symbol &);
 bool CanBeTypeBoundProc(const Symbol &);
 // Does a non-PARAMETER symbol have explicit initialization with =value or
 // =>target in its declaration (but not in a DATA statement)? (Being
@@ -652,6 +653,8 @@ using PotentialAndPointerComponentIterator =
 // dereferenced.
 PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent(
     const DerivedTypeSpec &, bool ignoreCoarrays = false);
+PotentialComponentIterator::const_iterator FindNotifyPotentialComponent(
+    const DerivedTypeSpec &, bool ignoreCoarrays = false);
 PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent(
     const DerivedTypeSpec &);
 PotentialAndPointerComponentIterator::const_iterator
diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp
index b9b34d4d5bc89..b257dad42fc58 100644
--- a/flang/lib/Evaluate/variable.cpp
+++ b/flang/lib/Evaluate/variable.cpp
@@ -89,6 +89,14 @@ std::optional<Expr<SomeType>> CoarrayRef::team() const {
   }
 }
 
+std::optional<Expr<SomeType>> CoarrayRef::notify() const {
+  if (notify_) {
+    return notify_.value().value();
+  } else {
+    return std::nullopt;
+  }
+}
+
 CoarrayRef &CoarrayRef::set_stat(Expr<SomeInteger> &&v) {
   CHECK(IsVariable(v));
   stat_.emplace(std::move(v));
@@ -100,6 +108,11 @@ CoarrayRef &CoarrayRef::set_team(Expr<SomeType> &&v) {
   return *this;
 }
 
+CoarrayRef &CoarrayRef::set_notify(Expr<SomeType> &&v) {
+  notify_.emplace(std::move(v));
+  return *this;
+}
+
 const Symbol &CoarrayRef::GetFirstSymbol() const {
   return base().GetFirstSymbol();
 }
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ad456d89bc432..51170a39d272b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1763,21 +1763,22 @@ static void genTaskgroupClauses(
   cp.processTaskReduction(loc, clauseOps, taskReductionSyms);
 }
 
-static void genTaskloopClauses(lower::AbstractConverter &converter,
-                               semantics::SemanticsContext &semaCtx,
-                               lower::StatementContext &stmtCtx,
-                               const List<Clause> &clauses, mlir::Location loc,
-                               mlir::omp::TaskloopOperands &clauseOps) {
+static void genTaskloopClauses(
+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
+    lower::StatementContext &stmtCtx, const List<Clause> &clauses,
+    mlir::Location loc, mlir::omp::TaskloopOperands &clauseOps,
+    llvm::SmallVectorImpl<const semantics::Symbol *> &taskReductionSyms) {
 
   ClauseProcessor cp(converter, semaCtx, clauses);
   cp.processGrainsize(stmtCtx, clauseOps);
   cp.processNumTasks(stmtCtx, clauseOps);
+  cp.processReduction(loc, clauseOps, taskReductionSyms);
 
   cp.processTODO<clause::Allocate, clause::Collapse, clause::Default,
                  clause::Final, clause::If, clause::InReduction,
                  clause::Lastprivate, clause::Mergeable, clause::Nogroup,
-                 clause::Priority, clause::Reduction, clause::Shared,
-                 clause::Untied>(loc, llvm::omp::Directive::OMPD_taskloop);
+                 clause::Priority, clause::Shared, clause::Untied>(
+      loc, llvm::omp::Directive::OMPD_taskloop);
 }
 
 static void genTaskwaitClauses(lower::AbstractConverter &converter,
@@ -2979,8 +2980,9 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop(
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::TaskloopOperands taskloopClauseOps;
+  llvm::SmallVector<const semantics::Symbol *> taskReductionSyms;
   genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
-                     taskloopClauseOps);
+                     taskloopClauseOps, taskReductionSyms);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
                            enableDelayedPrivatization, symTable);
@@ -2994,6 +2996,8 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop(
   EntryBlockArgs taskloopArgs;
   taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
   taskloopArgs.priv.vars = taskloopClauseOps.privateVars;
+  taskloopArgs.reduction.syms = taskReductionSyms;
+  taskloopArgs.reduction.vars = taskloopClauseOps.reductionVars;
 
   auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
       converter, loc, taskloopClauseOps, taskloopArgs);
diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp
index 1b4d37e9798a9..4b95a3adf052a 100644
--- a/flang/lib/Lower/Support/Utils.cpp
+++ b/flang/lib/Lower/Support/Utils.cpp
@@ -82,7 +82,7 @@ class HashEvaluateExpr {
          x.cosubscript())
       cosubs -= getHashValue(v);
     return getHashValue(x.base()) * 97u - cosubs + getHashValue(x.stat()) +
-           257u + getHashValue(x.team());
+           257u + getHashValue(x.team()) + getHashValue(x.notify());
   }
   static unsigned getHashValue(const Fortran::evaluate::NamedEntity &x) {
     if (x.IsSymbol())
@@ -341,7 +341,8 @@ class IsEqualEvaluateExpr {
                       const Fortran::evaluate::CoarrayRef &y) {
     return isEqual(x.base(), y.base()) &&
            isEqual(x.cosubscript(), y.cosubscript()) &&
-           isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team());
+           isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()) &&
+           isEqual(x.notify(), y.notify());
   }
   static bool isEqual(const Fortran::evaluate::NamedEntity &x,
                       const Fortran::evaluate::NamedEntity &y) {
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index 59fe7d813d96a..ea6a1eada2741 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1212,12 +1212,15 @@ TYPE_CONTEXT_PARSER("image selector"_en_US,
 
 // R926 image-selector-spec ->
 //        STAT = stat-variable | TEAM = team-value |
-//        TEAM_NUMBER = scalar-int-expr
+//        TEAM_NUMBER = scalar-int-expr |
+//        NOTIFY = notify-variable
 TYPE_PARSER(construct<ImageSelectorSpec>(construct<ImageSelectorSpec::Stat>(
                 "STAT =" >> scalar(integer(indirect(variable))))) ||
     construct<ImageSelectorSpec>(construct<TeamValue>("TEAM =" >> teamValue)) ||
     construct<ImageSelectorSpec>(construct<ImageSelectorSpec::Team_Number>(
-        "TEAM_NUMBER =" >> scalarIntExpr)))
+        "TEAM_NUMBER =" >> scalarIntExpr)) ||
+    construct<ImageSelectorSpec>(construct<ImageSelectorSpec::Notify>(
+        "NOTIFY =" >> scalar(indirect(variable)))))
 
 // R927 allocate-stmt ->
 //        ALLOCATE ( [type-spec ::] allocation-list [, alloc-opt-list] )
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 84123030195e9..6bb14a43e7b99 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -819,6 +819,7 @@ class UnparseVisitor {
       Word("TEAM=");
     }
   }
+  void Before(const ImageSelectorSpec::Notify &) { Word("NOTIFY="); }
   void Unparse(const AllocateStmt &x) { // R927
     Word("ALLOCATE(");
     Walk(std::get<std::optional<TypeSpec>>(x.t), "::");
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index de407d3b1e125..9a6b3ff3cdc2c 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -855,6 +855,15 @@ void CheckHelper::CheckObjectEntity(
         messages_.Say(
             "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US,
             symbol.name(), component.BuildResultDesignatorName());
+      } else if (IsNotifyType(derived)) { // C1612
+        messages_.Say(
+            "Variable '%s' with NOTIFY_TYPE must be a coarray"_err_en_US,
+            symbol.name());
+      } else if (auto component{FindNotifyPotentialComponent( // C1611
+                     *derived, /*ignoreCoarrays=*/true)}) {
+        messages_.Say(
+            "Variable '%s' with NOTIFY_TYPE potential component '%s' must be a coarray"_err_en_US,
+            symbol.name(), component.BuildResultDesignatorName());
       }
     }
   }
@@ -873,6 +882,10 @@ void CheckHelper::CheckObjectEntity(
         messages_.Say(
             "An INTENT(OUT) dummy argument may not be, or contain, EVENT_TYPE or LOCK_TYPE"_err_en_US);
       }
+      if (IsOrContainsNotifyComponent(symbol)) { // C1613
+        messages_.Say(
+            "An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE"_err_en_US);
+      }
       if (IsAssumedSizeArray(symbol)) { // C834
         if (type && type->IsPolymorphic()) {
           messages_.Say(
diff --git a/flang/lib/Semantics/dump-expr.cpp b/flang/lib/Semantics/dump-expr.cpp
index 66cedab94bfb4..8d354cf65b61e 100644
--- a/flang/lib/Semantics/dump-expr.cpp
+++ b/flang/lib/Semantics/dump-expr.cpp
@@ -23,6 +23,7 @@ void DumpEvaluateExpr::Show(const evaluate::CoarrayRef &x) {
   Indent("coarray ref");
   Show(x.base());
   Show(x.cosubscript());
+  Show(x.notify());
   Show(x.stat());
   Show(x.team());
   Outdent();
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index c8167fd34f666..ac58dfc005f17 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -1579,6 +1579,19 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::CoindexedNamedObject &x) {
         std::get<std::list<parser::ImageSelectorSpec>>(x.imageSelector.t)) {
       common::visit(
           common::visitors{
+              [&](const parser::ImageSelectorSpec::Notify &x) {
+                Analyze(x.v);
+                if (const auto *expr{GetExpr(context_, x.v)}) {
+                  if (coarrayRef.notify()) {
+                    Say("coindexed reference has multiple NOTIFY= specifiers"_err_en_US);
+                  } else if (auto dyType{expr->GetType()};
+                      dyType && IsNotifyType(GetDerivedTypeSpec(*dyType))) {
+                    coarrayRef.set_notify(Expr<SomeType>{*expr});
+                  } else {
+                    Say("NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV"_err_en_US);
+                  }
+                }
+              },
               [&](const parser::ImageSelectorSpec::Stat &x) {
                 Analyze(x.v);
                 if (const auto *expr{GetExpr(context_, x.v)}) {
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 8eddd03faa962..cf1e5e7d44565 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -582,6 +582,18 @@ bool IsOrContainsEventOrLockComponent(const Symbol &original) {
   return false;
 }
 
+bool IsOrContainsNotifyComponent(const Symbol &original) {
+  const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)};
+  if (evaluate::IsVariable(symbol)) {
+    if (const DeclTypeSpec *type{symbol.GetType()}) {
+      if (const DerivedTypeSpec *derived{type->AsDerived()}) {
+        return IsNotifyType(derived) || FindNotifyPotentialComponent(*derived);
+      }
+    }
+  }
+  return false;
+}
+
 // Check this symbol suitable as a type-bound procedure - C769
 bool CanBeTypeBoundProc(const Symbol &symbol) {
   if (IsDummy(symbol) || IsProcedurePointer(symbol)) {
@@ -1489,6 +1501,32 @@ PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent(
   return iter;
 }
 
+PotentialComponentIterator::const_iterator FindNotifyPotentialComponent(
+    const DerivedTypeSpec &derived, bool ignoreCoarrays) {
+  PotentialComponentIterator potentials{derived};
+  auto iter{potentials.begin()};
+  for (auto end{potentials.end()}; iter != end; ++iter) {
+    const Symbol &component{*iter};
+    if (const auto *object{component.detailsIf<ObjectEntityDetails>()}) {
+      if (const DeclTypeSpec *type{object->type()}) {
+        if (IsNotifyType(type->AsDerived())) {
+          if (!ignoreCoarrays) {
+            break; // found one
+          }
+          auto path{iter.GetComponentPath()};
+          path.pop_back();
+          if (std::find_if(path.begin(), path.end(), [](const Symbol &sym) {
+                return evaluate::IsCoarray(sym);
+              }) == path.end()) {
+            break; // found one not in a coarray
+          }
+        }
+      }
+    }
+  }
+  return iter;
+}
+
 UltimateComponentIterator::const_iterator FindAllocatableUltimateComponent(
     const DerivedTypeSpec &derived) {
   UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90
deleted file mode 100644
index 0c16bd227257f..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90
+++ /dev/null
@@ -1,13 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Unhandled clause REDUCTION in TASKLOOP construct
-subroutine omp_taskloop_reduction()
-   integer x
-   x = 0
-   !$omp taskloop reduction(+:x)
-   do i = 1, 100
-      x = x + 1
-   end do
-   !$omp end taskloop
-end subroutine omp_taskloop_reduction
diff --git a/flang/test/Lower/OpenMP/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/taskloop-reduction.f90
new file mode 100644
index 0000000000000..4185a927366e7
--- /dev/null
+++ b/flang/test/Lower/OpenMP/taskloop-reduction.f90
@@ -0,0 +1,30 @@
+! This test checks the lowering of the reduction clause in the taskloop construct
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+! RUN %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=45 -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-VERSION
+
+! CHECK-VERSION: error: REDUCTION clause is not allowed on directive TASKLOOP in OpenMP v4.5, try -fopenmp-version=50
+
+! CHECK-LABEL: omp.private
+! CHECK-SAME: {type = private} @[[I_PRIVATE:.*]] : i32
+
+! CHECK-LABEL: func.func @_QPtest_reduction()
+! CHECK: %[[ALLOCA_A:.*]] = fir.alloca !fir.array<10xi32> {bindc_name = "a", uniq_name = "_QFtest_reductionEa"}
+! CHECK: %[[DECLARE_A:.*]]:2 = hlfir.declare %[[ALLOCA_A]](%2) {uniq_name = "_QFtest_reductionEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_reductionEi"}
+! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtest_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[ALLOCA_SUM_I:.*]] = fir.alloca i32 {bindc_name = "sum_i", uniq_name = "_QFtest_reductionEsum_i"}
+! CHECK: %[[DECLARE_SUM_I:.*]]:2 = hlfir.declare %[[ALLOCA_SUM_I]] {uniq_name = "_QFtest_reductionEsum_i"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+subroutine test_reduction
+    integer :: i, a(10), sum_i
+
+    ! CHECK: omp.taskloop
+    ! CHECK-SAME: private(@[[I_PRIVATE]] %[[DECLARE_I]]#0 -> %arg0 : !fir.ref<i32>) reduction(@add_reduction_i32 %[[DECLARE_SUM_I]]#0 -> %arg1 : !fir.ref<i32>) {
+    !$omp taskloop reduction (+:sum_i)
+    do i = 1,10
+        sum_i = sum_i + i
+    end do
+    !$omp end taskloop
+
+end subroutine
\ No newline at end of file
diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90
index b16e0ccb58797..e866dd89c07ab 100644
--- a/flang/test/Semantics/coarrays02.f90
+++ b/flang/test/Semantics/coarrays02.f90
@@ -16,6 +16,8 @@ program main
   type(event_type) event
   !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray
   type(lock_type) lock
+  !ERROR: Variable 'notify' with NOTIFY_TYPE must be a coarray
+  type(notify_type) notify
   integer :: local[*] ! ok in main
 end
 
@@ -120,3 +122,18 @@ subroutine s4
   !ERROR: Subscripts must appear in a coindexed reference when its base is an array
   print *, ta(1)%a[1]
 end
+
+subroutine s5(a, notify, res)
+  use iso_fortran_env
+  type t
+    type(notify_type) :: a
+  end type
+  real, intent(in) :: a[*]
+  type(event_type), intent(in) :: notify[*]
+  !ERROR: An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE
+  type(notify_type), intent(out) :: res[*]
+  !ERROR: Variable 'bad' with NOTIFY_TYPE potential component '%a' must be a coarray
+  type(t) :: bad
+  !ERROR: NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV
+  print *, a[1, NOTIFY=notify]
+end
diff --git a/flang/test/Semantics/notifywait03.f90 b/flang/test/Semantics/notifywait03.f90
index 0fc56f66ad32d..a336a7a67669a 100644
--- a/flang/test/Semantics/notifywait03.f90
+++ b/flang/test/Semantics/notifywait03.f90
@@ -10,6 +10,7 @@ program test_notify_wait
   implicit none
 
   ! notify_type variables must be coarrays
+  !ERROR: Variable 'non_coarray' with NOTIFY_TYPE must be a coarray
   type(notify_type) :: non_coarray
 
   type(notify_type) :: notify_var[*], notify_array(2)[*]
diff --git a/libc/shared/math.h b/libc/shared/math.h
index bd6aee73c3933..282dd6243d6a7 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -51,6 +51,7 @@
 #include "math/exp2f.h"
 #include "math/exp2f16.h"
 #include "math/exp2m1f.h"
+#include "math/exp2m1f16.h"
 #include "math/expf.h"
 #include "math/expf16.h"
 #include "math/frexpf.h"
diff --git a/libc/shared/math/exp2m1f16.h b/libc/shared/math/exp2m1f16.h
new file mode 100644
index 0000000000000..96a404708be18
--- /dev/null
+++ b/libc/shared/math/exp2m1f16.h
@@ -0,0 +1,29 @@
+//===-- Shared exp2m1f16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/exp2m1f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2m1f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 620900028d424..ddc0159b10ce4 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -769,6 +769,24 @@ add_header_library(
     libc.src.__support.macros.properties.cpu_features
 )
 
+add_header_library(
+  exp2m1f16
+  HDRS
+    exp2m1f16.h
+  DEPENDS
+    .expxf16_utils
+    libc.src.__support.common
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
 add_header_library(
   exp10
   HDRS
diff --git a/libc/src/__support/math/exp2m1f16.h b/libc/src/__support/math/exp2m1f16.h
new file mode 100644
index 0000000000000..0424af4aa953d
--- /dev/null
+++ b/libc/src/__support/math/exp2m1f16.h
@@ -0,0 +1,180 @@
+//===-- Implementation header for exp2m1f16 ----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+#include "src/__support/math/expxf16_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 exp2m1f16(float16 x) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
+      {0x0b3dU, 0x0904U, 1U, 0U, 1U},
+      // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
+      {0x0d3fU, 0x0b45U, 1U, 0U, 1U},
+      // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
+      {0x118cU, 0x0fb1U, 1U, 0U, 0U},
+      // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
+      {0x21bcU, 0x1ffaU, 1U, 0U, 1U},
+      // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
+      {0x9718U, 0x94eaU, 0U, 1U, 0U},
+      // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
+      {0x973fU, 0x9505U, 0U, 1U, 0U},
+  }};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+  constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
+#else
+  constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
+#endif
+
+  constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
+      EXP2M1F16_EXCEPTS_HI = {{
+          // (input, RZ output, RU offset, RD offset, RN offset)
+          // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
+          {0x3396U, 0x31b7U, 1U, 0U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
+          {0x34baU, 0x3345U, 1U, 0U, 0U},
+#endif
+          // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
+          {0x36b6U, 0x3566U, 1U, 0U, 0U},
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
+          {0x37b7U, 0x3659U, 1U, 0U, 1U},
+#endif
+          // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
+          {0xb201U, 0xafcdU, 0U, 1U, 1U},
+          // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
+          {0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
+          // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
+          {0xb8a5U, 0xb54cU, 0U, 1U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
+          {0xba8dU, 0xb6edU, 0U, 1U, 1U},
+#endif
+      }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using namespace math::expxf16_internal;
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
+    // exp2m1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When x >= 16.
+    if (x_u >= 0x4c00 && x_bits.is_pos()) {
+      // exp2m1(+inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // When x < -11.
+    if (x_u > 0xc980U) {
+      // exp2m1(-inf) = -1
+      if (x_bits.is_inf())
+        return FPBits::one(Sign::NEG).get_val();
+
+      // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
+      if (x_u < 0xca00U)
+        return fputil::round_result_slightly_down(
+            fputil::cast<float16>(-0x1.ffcp-1));
+
+      // When x <= -12, round(2^x - 1, HP, RN) = -1.
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_DOWNWARD:
+        return FPBits::one(Sign::NEG).get_val();
+      default:
+        return fputil::cast<float16>(-0x1.ffcp-1);
+      }
+    }
+
+    // When |x| <= 2^(-3).
+    if (x_abs <= 0x3000U) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
+          LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      float xf = x;
+      // Degree-5 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
+      //   > x * P;
+      return fputil::cast<float16>(
+          xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
+                                0x1.c6af88p-5f, 0x1.3b45d6p-7f,
+                                0x1.641e7cp-10f));
+    }
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // exp2(x) = exp2(hi + mid) * exp2(lo)
+  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
+  // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
+  return fputil::cast<float16>(
+      fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c048a64db6bc2..e71300536616b 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1498,19 +1498,7 @@ add_entrypoint_object(
   HDRS
     ../exp2m1f16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.common
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.cpu_features
-    libc.src.__support.math.expxf16_utils
+    libc.src.__support.math.exp2m1f16
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp
index ce0cc60748f19..497a2887cea4c 100644
--- a/libc/src/math/generic/exp2m1f16.cpp
+++ b/libc/src/math/generic/exp2m1f16.cpp
@@ -7,163 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/exp2m1f16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/macros/properties/cpu_features.h"
-#include "src/__support/math/expxf16_utils.h"
+#include "src/__support/math/exp2m1f16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
-    // (input, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
-    {0x0b3dU, 0x0904U, 1U, 0U, 1U},
-    // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
-    {0x0d3fU, 0x0b45U, 1U, 0U, 1U},
-    // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
-    {0x118cU, 0x0fb1U, 1U, 0U, 0U},
-    // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
-    {0x21bcU, 0x1ffaU, 1U, 0U, 1U},
-    // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
-    {0x9718U, 0x94eaU, 0U, 1U, 0U},
-    // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
-    {0x973fU, 0x9505U, 0U, 1U, 0U},
-}};
-
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
-#else
-static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
-#endif
-
-static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
-    EXP2M1F16_EXCEPTS_HI = {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
-        {0x3396U, 0x31b7U, 1U, 0U, 0U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
-        {0x34baU, 0x3345U, 1U, 0U, 0U},
-#endif
-        // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
-        {0x36b6U, 0x3566U, 1U, 0U, 0U},
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
-        {0x37b7U, 0x3659U, 1U, 0U, 1U},
-#endif
-        // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
-        {0xb201U, 0xafcdU, 0U, 1U, 1U},
-        // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
-        {0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
-        // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
-        {0xb8a5U, 0xb54cU, 0U, 1U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
-        {0xba8dU, 0xb6edU, 0U, 1U, 1U},
-#endif
-    }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
 LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) {
-  using namespace math::expxf16_internal;
-  using FPBits = fputil::FPBits<float16>;
-  FPBits x_bits(x);
-
-  uint16_t x_u = x_bits.uintval();
-  uint16_t x_abs = x_u & 0x7fffU;
-
-  // When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
-  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
-    // exp2m1(NaN) = NaN
-    if (x_bits.is_nan()) {
-      if (x_bits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // When x >= 16.
-    if (x_u >= 0x4c00 && x_bits.is_pos()) {
-      // exp2m1(+inf) = +inf
-      if (x_bits.is_inf())
-        return FPBits::inf().get_val();
-
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_UPWARD:
-        fputil::set_errno_if_required(ERANGE);
-        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
-        return FPBits::inf().get_val();
-      default:
-        return FPBits::max_normal().get_val();
-      }
-    }
-
-    // When x < -11.
-    if (x_u > 0xc980U) {
-      // exp2m1(-inf) = -1
-      if (x_bits.is_inf())
-        return FPBits::one(Sign::NEG).get_val();
-
-      // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
-      if (x_u < 0xca00U)
-        return fputil::round_result_slightly_down(
-            fputil::cast<float16>(-0x1.ffcp-1));
-
-      // When x <= -12, round(2^x - 1, HP, RN) = -1.
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_DOWNWARD:
-        return FPBits::one(Sign::NEG).get_val();
-      default:
-        return fputil::cast<float16>(-0x1.ffcp-1);
-      }
-    }
-
-    // When |x| <= 2^(-3).
-    if (x_abs <= 0x3000U) {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
-          LIBC_UNLIKELY(r.has_value()))
-        return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-      float xf = x;
-      // Degree-5 minimax polynomial generated by Sollya with the following
-      // commands:
-      //   > display = hexadecimal;
-      //   > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
-      //   > x * P;
-      return fputil::cast<float16>(
-          xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
-                                0x1.c6af88p-5f, 0x1.3b45d6p-7f,
-                                0x1.641e7cp-10f));
-    }
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // exp2(x) = exp2(hi + mid) * exp2(lo)
-  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
-  // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
-  return fputil::cast<float16>(
-      fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
+  return math::exp2m1f16(x);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp
index 4393f9d5e5c3b..64f50d7be7fe3 100644
--- a/libc/test/UnitTest/FEnvSafeTest.cpp
+++ b/libc/test/UnitTest/FEnvSafeTest.cpp
@@ -43,7 +43,8 @@ void FEnvSafeTest::set_fenv(const fenv_t &fenv) {
 
 void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv,
                                   const fenv_t &after_fenv) {
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) && \
+    defined(__ARM_FP)
   using FPState = LIBC_NAMESPACE::fputil::FEnv::FPState;
   const FPState &before_state = reinterpret_cast<const FPState &>(before_fenv);
   const FPState &after_state = reinterpret_cast<const FPState &>(after_fenv);
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index aede395350821..762b5b0417ef6 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -44,6 +44,7 @@ add_fp_unittest(
     libc.src.__support.math.exp2f
     libc.src.__support.math.exp2f16
     libc.src.__support.math.exp2m1f
+    libc.src.__support.math.exp2m1f16
     libc.src.__support.math.exp10
     libc.src.__support.math.exp10f
     libc.src.__support.math.exp10f16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index a6825a10654c9..5b409781a5b07 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -29,6 +29,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp2m1f16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
 
   ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5));
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 78f63b4406eb0..b229659415d55 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3620,10 +3620,7 @@ class LLVM_ABI TargetLoweringBase {
     return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Call);
   }
 
-  const char *getMemcpyName() const {
-    // FIXME: Return StringRef
-    return Libcalls.getMemcpyName().data();
-  }
+  RTLIB::LibcallImpl getMemcpyImpl() const { return Libcalls.getMemcpyImpl(); }
 
   /// Check if this is valid libcall for the current module, otherwise
   /// RTLIB::Unsupported.
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 155cfe8dd3a98..711aa70a4a8d3 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -708,7 +708,7 @@ struct DEMANGLE_ABI SpecialTableSymbolNode : public SymbolNode {
     return N->kind() == NodeKind::SpecialTableSymbol;
   }
 
-  QualifiedNameNode *TargetName = nullptr;
+  NodeArrayNode *TargetNames = nullptr;
   Qualifiers Quals = Qualifiers::Q_None;
 };
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 208609f64f418..0afae8a013bd6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -1284,17 +1284,16 @@ def OMP_TaskGroup : Directive<[Spelling<"taskgroup">]> {
   let category = CA_Executable;
 }
 def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> {
-  let allowedClauses = [
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_FirstPrivate>,
-    VersionedClause<OMPC_InReduction>,
-    VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Mergeable>,
-    VersionedClause<OMPC_NoGroup>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Untied>,
+  let allowedClauses = [VersionedClause<OMPC_Allocate>,
+                        VersionedClause<OMPC_FirstPrivate>,
+                        VersionedClause<OMPC_InReduction>,
+                        VersionedClause<OMPC_LastPrivate>,
+                        VersionedClause<OMPC_Mergeable>,
+                        VersionedClause<OMPC_NoGroup>,
+                        VersionedClause<OMPC_Private>,
+                        VersionedClause<OMPC_Reduction, 50>,
+                        VersionedClause<OMPC_Shared>,
+                        VersionedClause<OMPC_Untied>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 84026aa9d3624..1c46965d995fe 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w
 def int_loongarch_lasx_xvstelm_d
   : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
            [IntrWriteMem, IntrArgMemOnly, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+
+// LASX and LSX conversion
+def int_loongarch_lasx_cast_128_s
+  : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_cast_128_d
+  : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_cast_128
+  : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128_s
+  : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128_d
+  : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_concat_128
+  : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo_s
+  : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo_d
+  : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_lo
+  : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi_s
+  : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi_d
+  : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_extract_128_hi
+  : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo_s
+  : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo_d
+  : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_lo
+  : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi_s
+  : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi_d
+  : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_loongarch_lasx_insert_128_hi
+  : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 } // TargetPrefix = "loongarch"
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index a0876b169e0b8..a7bcbf010d1bf 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -194,10 +194,11 @@ LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I,
 /// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch
 /// weights in the new instruction if the parent function of the original
 /// instruction has an entry count. This is to not confuse users by injecting
-/// profile data into non-profiled functions.
-LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I,
-                                                          Function &F,
-                                                          StringRef PassName);
+/// profile data into non-profiled functions. If \p F is nullptr, we will fetch
+/// the function from \p I.
+LLVM_ABI void
+setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName,
+                                            const Function *F = nullptr);
 
 /// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their
 /// entry counts.
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index ab14ed44fed52..bae760b3f981d 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -134,13 +134,13 @@ struct RuntimeLibcallsInfo {
 
   /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully
   /// unsupported.
-  StringRef getMemcpyName() const {
+  RTLIB::LibcallImpl getMemcpyImpl() const {
     RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY);
     if (Memcpy != RTLIB::Unsupported)
-      return getLibcallImplName(Memcpy);
+      return Memcpy;
 
     // Fallback to memmove if memcpy isn't available.
-    return getLibcallName(RTLIB::MEMMOVE);
+    return getLibcallImpl(RTLIB::MEMMOVE);
   }
 
   bool isAvailable(RTLIB::LibcallImpl Impl) const {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 6412949948c07..0b55c03a46747 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1301,7 +1301,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop(
   // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
   // hard to predict precise branch weigths we mark the branch as "unknown"
   // (50/50) to prevent misleading optimizations.
-  setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
   return Loaded;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 52c43a4ac4a04..d02f097fef829 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -776,7 +776,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
     break;
   case TargetOpcode::G_MEMCPY:
     RTLibcall = RTLIB::MEMCPY;
-    Name = TLI.getMemcpyName();
+    Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
     Args[0].Flags[0].setReturned();
     break;
   case TargetOpcode::G_MEMMOVE:
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 88d81993fbe55..f4c1a8bf339c8 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -83,15 +83,21 @@ llvm::createUnpackMachineBundles(
   return new UnpackMachineBundles(std::move(Ftor));
 }
 
-/// Return the first found DebugLoc that has a DILocation, given a range of
-/// instructions. The search range is from FirstMI to LastMI (exclusive). If no
-/// DILocation is found, then an empty location is returned.
+/// Return the first DebugLoc that has line number information, given a
+/// range of instructions. The search range is from FirstMI to LastMI
+/// (exclusive). Otherwise return the first DILocation or an empty location if
+/// there are none.
 static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
                             MachineBasicBlock::instr_iterator LastMI) {
-  for (auto MII = FirstMI; MII != LastMI; ++MII)
-    if (MII->getDebugLoc())
-      return MII->getDebugLoc();
-  return DebugLoc();
+  DebugLoc DL;
+  for (auto MII = FirstMI; MII != LastMI; ++MII) {
+    if (DebugLoc MIIDL = MII->getDebugLoc()) {
+      if (MIIDL.getLine() != 0)
+        return MIIDL;
+      DL = MIIDL.get();
+    }
+  }
+  return DL;
 }
 
 /// Check if target reg is contained in given lists, which are:
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 620d3d3d02daa..d738dc4eea36d 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -244,7 +244,7 @@ static bool canEmitMemcpy(const TargetMachine *TM, Function *F) {
   if (!TM)
     return true;
   const TargetLowering *TLI = TM->getSubtargetImpl(*F)->getTargetLowering();
-  return TLI->getMemcpyName() != nullptr;
+  return TLI->getMemcpyImpl() != RTLIB::Unsupported;
 }
 
 // Return a value appropriate for use with the memset_pattern16 libcall, if
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 379242ec5a157..ff6a7b2cf5462 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9257,21 +9257,22 @@ SDValue SelectionDAG::getMemcpy(
   // FIXME: pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
   bool IsTailCall = false;
-  const char *MemCpyName = TLI->getMemcpyName();
+  RTLIB::LibcallImpl MemCpyImpl = TLI->getMemcpyImpl();
 
   if (OverrideTailCall.has_value()) {
     IsTailCall = *OverrideTailCall;
   } else {
-    bool LowersToMemcpy = StringRef(MemCpyName) == StringRef("memcpy");
+    bool LowersToMemcpy = MemCpyImpl == RTLIB::impl_memcpy;
     IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemcpy);
   }
 
   CLI.setDebugLoc(dl)
       .setChain(Chain)
       .setLibCallee(
-          TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+          TLI->getLibcallImplCallingConv(MemCpyImpl),
           Dst.getValueType().getTypeForEVT(*getContext()),
-          getExternalSymbol(MemCpyName, TLI->getPointerTy(getDataLayout())),
+          getExternalSymbol(TLI->getLibcallImplName(MemCpyImpl).data(),
+                            TLI->getPointerTy(getDataLayout())),
           std::move(Args))
       .setDiscardResult()
       .setTailCall(IsTailCall);
@@ -9361,22 +9362,24 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
   // FIXME:  pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
 
+  RTLIB::LibcallImpl MemmoveImpl = TLI->getLibcallImpl(RTLIB::MEMMOVE);
+
   bool IsTailCall = false;
   if (OverrideTailCall.has_value()) {
     IsTailCall = *OverrideTailCall;
   } else {
-    bool LowersToMemmove =
-        TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove");
+    bool LowersToMemmove = MemmoveImpl == RTLIB::impl_memmove;
     IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemmove);
   }
 
   CLI.setDebugLoc(dl)
       .setChain(Chain)
-      .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
-                    Dst.getValueType().getTypeForEVT(*getContext()),
-                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
-                                      TLI->getPointerTy(getDataLayout())),
-                    std::move(Args))
+      .setLibCallee(
+          TLI->getLibcallImplCallingConv(MemmoveImpl),
+          Dst.getValueType().getTypeForEVT(*getContext()),
+          getExternalSymbol(TLI->getLibcallImplName(MemmoveImpl).data(),
+                            TLI->getPointerTy(getDataLayout())),
+          std::move(Args))
       .setDiscardResult()
       .setTailCall(IsTailCall);
 
@@ -9492,8 +9495,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                        TLI->getPointerTy(DL)),
                      std::move(Args));
   }
-  bool LowersToMemset =
-      TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset");
+
+  RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET);
+  bool LowersToMemset = MemsetImpl == RTLIB::impl_memset;
+
   // If we're going to use bzero, make sure not to tail call unless the
   // subsequent return doesn't need a value, as bzero doesn't return the first
   // arg unlike memset.
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index b22928be3be50..250d382998982 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -15,6 +15,8 @@
 
 #include "llvm/Demangle/MicrosoftDemangle.h"
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
@@ -277,6 +279,15 @@ demanglePointerCVQualifiers(std::string_view &MangledName) {
   DEMANGLE_UNREACHABLE;
 }
 
+static NodeArrayNode *smallVecToNodeArray(ArenaAllocator &Arena,
+                                          ArrayRef<Node *> Vec) {
+  NodeArrayNode *Arr = Arena.alloc<NodeArrayNode>();
+  Arr->Count = Vec.size();
+  Arr->Nodes = Arena.allocArray<Node *>(Vec.size());
+  std::memcpy(Arr->Nodes, Vec.data(), Vec.size() * sizeof(Node *));
+  return Arr;
+}
+
 std::string_view Demangler::copyString(std::string_view Borrowed) {
   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
   // This is not a micro-optimization, it avoids UB, should Borrowed be an null
@@ -323,8 +334,19 @@ Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName,
   }
 
   std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
-  if (!consumeFront(MangledName, '@'))
-    STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
+
+  SmallVector<Node *, 1> TargetNames;
+  while (!consumeFront(MangledName, '@')) {
+    QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName);
+    if (Error)
+      return nullptr;
+    assert(QN);
+    TargetNames.push_back(QN);
+  }
+
+  if (!TargetNames.empty())
+    STSN->TargetNames = smallVecToNodeArray(Arena, TargetNames);
+
   return STSN;
 }
 
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 61e4961c714bc..17c6aab500049 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -662,9 +662,9 @@ void VcallThunkIdentifierNode::output(OutputBuffer &OB,
 void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
   outputQualifiers(OB, Quals, false, true);
   Name->output(OB, Flags);
-  if (TargetName) {
+  if (TargetNames) {
     OB << "{for `";
-    TargetName->output(OB, Flags);
+    TargetNames->output(OB, Flags, "'s `");
     OB << "'}";
   }
 }
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 88dbd176e0d3f..95edb2e8e56d8 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -1019,8 +1019,7 @@ Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True,
                                                      const Twine &Name) {
   Value *Ret = CreateSelectFMF(C, True, False, {}, Name);
   if (auto *SI = dyn_cast<SelectInst>(Ret)) {
-    setExplicitlyUnknownBranchWeightsIfProfiled(
-        *SI, *SI->getParent()->getParent(), PassName);
+    setExplicitlyUnknownBranchWeightsIfProfiled(*SI, PassName);
   }
   return Ret;
 }
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index fc2be5188f456..94dbe1f3988b8 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -274,9 +274,12 @@ void llvm::setExplicitlyUnknownBranchWeights(Instruction &I,
 }
 
 void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I,
-                                                       Function &F,
-                                                       StringRef PassName) {
-  if (std::optional<Function::ProfileCount> EC = F.getEntryCount();
+                                                       StringRef PassName,
+                                                       const Function *F) {
+  F = F ? F : I.getFunction();
+  assert(F && "Either pass a instruction attached to a Function, or explicitly "
+              "pass the Function that it will be attached to");
+  if (std::optional<Function::ProfileCount> EC = F->getEntryCount();
       EC && EC->getCount() > 0)
     setExplicitlyUnknownBranchWeights(I, PassName);
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index fe700e17d341b..cf4ffc82f6009 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6630,6 +6630,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
                          N->getOperand(1));
     break;
+  case Intrinsic::loongarch_lasx_concat_128_s:
+  case Intrinsic::loongarch_lasx_concat_128_d:
+  case Intrinsic::loongarch_lasx_concat_128:
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
   }
   return SDValue();
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index b502b056c4cdf..00d52870f1727 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2113,6 +2113,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64,  2,  sub_128>;
 defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8,  sub_128>;
 defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8,  16, sub_128>;
 
+// LASX and LSX conversion
+def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)),
+          (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)),
+          (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)),
+          (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)),
+          (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)),
+          (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)),
+          (EXTRACT_SUBREG LASX256:$src, sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)),
+          (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)),
+          (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)),
+          (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
+def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
+          (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
 } // Predicates = [HasExtLASX]
 
 /// Intrinsic pattern
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 0ff178e1f1959..e9088a4d9275c 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(RISCVCodeGen
   RISCVMoveMerger.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
+  RISCVPromoteConstant.cpp
   RISCVPushPopOptimizer.cpp
   RISCVRedundantCopyElimination.cpp
   RISCVRegisterInfo.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ae9410193efe1..51e8e8574ed15 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -20,6 +20,7 @@
 namespace llvm {
 class FunctionPass;
 class InstructionSelector;
+class ModulePass;
 class PassRegistry;
 class RISCVRegisterBankInfo;
 class RISCVSubtarget;
@@ -111,6 +112,9 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
 FunctionPass *createRISCVPreLegalizerCombiner();
 void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
 
+ModulePass *createRISCVPromoteConstantPass();
+void initializeRISCVPromoteConstantPass(PassRegistry &);
+
 FunctionPass *createRISCVVLOptimizerPass();
 void initializeRISCVVLOptimizerPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp
new file mode 100644
index 0000000000000..bf1f69f8e8d93
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp
@@ -0,0 +1,213 @@
+//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-promote-const"
+#define RISCV_PROMOTE_CONSTANT_NAME "RISC-V Promote Constants"
+
+STATISTIC(NumPromoted, "Number of constant literals promoted to globals");
+STATISTIC(NumPromotedUses, "Number of uses of promoted literal constants");
+
+namespace {
+
+class RISCVPromoteConstant : public ModulePass {
+public:
+  static char ID;
+  RISCVPromoteConstant() : ModulePass(ID) {}
+
+  StringRef getPassName() const override { return RISCV_PROMOTE_CONSTANT_NAME; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetPassConfig>();
+    AU.setPreservesCFG();
+  }
+
+  /// Iterate over the functions and promote the double fp constants that
+  /// would otherwise go into the constant pool to a constant array.
+  bool runOnModule(Module &M) override {
+    if (skipModule(M))
+      return false;
+    // TargetMachine and Subtarget are needed to query isFPImmlegal.
+    const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+    const TargetMachine &TM = TPC.getTM<TargetMachine>();
+    bool Changed = false;
+    for (Function &F : M) {
+      const RISCVSubtarget &ST = TM.getSubtarget<RISCVSubtarget>(F);
+      const RISCVTargetLowering *TLI = ST.getTargetLowering();
+      Changed |= runOnFunction(F, TLI);
+    }
+    return Changed;
+  }
+
+private:
+  bool runOnFunction(Function &F, const RISCVTargetLowering *TLI);
+};
+} // end anonymous namespace
+
+char RISCVPromoteConstant::ID = 0;
+
+INITIALIZE_PASS(RISCVPromoteConstant, DEBUG_TYPE, RISCV_PROMOTE_CONSTANT_NAME,
+                false, false)
+
+ModulePass *llvm::createRISCVPromoteConstantPass() {
+  return new RISCVPromoteConstant();
+}
+
+bool RISCVPromoteConstant::runOnFunction(Function &F,
+                                         const RISCVTargetLowering *TLI) {
+  if (F.hasOptNone() || F.hasOptSize())
+    return false;
+
+  // Bail out and make no transformation if the target doesn't support
+  // doubles, or if we're not targeting RV64 as we currently see some
+  // regressions for those targets.
+  if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64))
+    return false;
+
+  // Collect all unique double constants and their uses in the function. Use
+  // MapVector to preserve insertion order.
+  MapVector<ConstantFP *, SmallVector<Use *, 8>> ConstUsesMap;
+
+  for (Instruction &I : instructions(F)) {
+    for (Use &U : I.operands()) {
+      auto *C = dyn_cast<ConstantFP>(U.get());
+      if (!C || !C->getType()->isDoubleTy())
+        continue;
+      // Do not promote if it wouldn't be loaded from the constant pool.
+      if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64,
+                            /*ForCodeSize=*/false))
+        continue;
+      // Do not promote a constant if it is used as an immediate argument
+      // for an intrinsic.
+      if (auto *II = dyn_cast<IntrinsicInst>(U.getUser())) {
+        Function *IntrinsicFunc = II->getFunction();
+        unsigned OperandIdx = U.getOperandNo();
+        if (IntrinsicFunc && IntrinsicFunc->getAttributes().hasParamAttr(
+                                 OperandIdx, Attribute::ImmArg)) {
+          LLVM_DEBUG(dbgs() << "Skipping promotion of constant in: " << *II
+                            << " because operand " << OperandIdx
+                            << " must be an immediate.\n");
+          continue;
+        }
+      }
+      // Note: FP args to inline asm would be problematic if we had a
+      // constraint that required an immediate floating point operand. At the
+      // time of writing LLVM doesn't recognise such a constraint.
+      ConstUsesMap[C].push_back(&U);
+    }
+  }
+
+  int PromotableConstants = ConstUsesMap.size();
+  LLVM_DEBUG(dbgs() << "Found " << PromotableConstants
+                    << " promotable constants in " << F.getName() << "\n");
+  // Bail out if no promotable constants found, or if only one is found.
+  if (PromotableConstants < 2) {
+    LLVM_DEBUG(dbgs() << "Performing no promotions as insufficient promotable "
+                         "constants found\n");
+    return false;
+  }
+
+  NumPromoted += PromotableConstants;
+
+  // Create a global array containing the promoted constants.
+  Module *M = F.getParent();
+  Type *DoubleTy = Type::getDoubleTy(M->getContext());
+
+  SmallVector<Constant *, 16> ConstantVector;
+  for (auto const &Pair : ConstUsesMap)
+    ConstantVector.push_back(Pair.first);
+
+  ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size());
+  Constant *GlobalArrayInitializer =
+      ConstantArray::get(ArrayTy, ConstantVector);
+
+  auto *GlobalArray = new GlobalVariable(
+      *M, ArrayTy,
+      /*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer,
+      ".promoted_doubles." + F.getName());
+
+  // A cache to hold the loaded value for a given constant within a basic block.
+  DenseMap<std::pair<ConstantFP *, BasicBlock *>, Value *> LocalLoads;
+
+  // Replace all uses with the loaded value.
+  unsigned Idx = 0;
+  for (auto const &Pair : ConstUsesMap) {
+    ConstantFP *Const = Pair.first;
+    const SmallVector<Use *, 8> &Uses = Pair.second;
+
+    for (Use *U : Uses) {
+      Instruction *UserInst = cast<Instruction>(U->getUser());
+      BasicBlock *InsertionBB;
+
+      // If the user is a PHI node, we must insert the load in the
+      // corresponding predecessor basic block. Otherwise, it's inserted into
+      // the same block as the use.
+      if (auto *PN = dyn_cast<PHINode>(UserInst))
+        InsertionBB = PN->getIncomingBlock(*U);
+      else
+        InsertionBB = UserInst->getParent();
+
+      if (isa<CatchSwitchInst>(InsertionBB->getTerminator())) {
+        LLVM_DEBUG(dbgs() << "Bailing out: catchswitch means thre is no valid "
+                             "insertion point.\n");
+        return false;
+      }
+
+      auto CacheKey = std::make_pair(Const, InsertionBB);
+      Value *LoadedVal = nullptr;
+
+      // Re-use a load if it exists in the insertion block.
+      if (LocalLoads.count(CacheKey)) {
+        LoadedVal = LocalLoads.at(CacheKey);
+      } else {
+        // Otherwise, create a new GEP and Load at the correct insertion point.
+        // It is always safe to insert in the first insertion point in the BB,
+        // so do that and let other passes reorder.
+        IRBuilder<> Builder(InsertionBB, InsertionBB->getFirstInsertionPt());
+        Value *ElementPtr = Builder.CreateConstInBoundsGEP2_64(
+            GlobalArray->getValueType(), GlobalArray, 0, Idx, "double.addr");
+        LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val");
+
+        // Cache the newly created load for this block.
+        LocalLoads[CacheKey] = LoadedVal;
+      }
+
+      U->set(LoadedVal);
+      ++NumPromotedUses;
+    }
+    ++Idx;
+  }
+
+  return true;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index ae54ff1515121..16ef67da83128 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -139,6 +139,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVExpandAtomicPseudoPass(*PR);
   initializeRISCVRedundantCopyEliminationPass(*PR);
   initializeRISCVAsmPrinterPass(*PR);
+  initializeRISCVPromoteConstantPass(*PR);
 }
 
 static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
@@ -462,6 +463,8 @@ void RISCVPassConfig::addIRPasses() {
 }
 
 bool RISCVPassConfig::addPreISel() {
+  if (TM->getOptLevel() != CodeGenOptLevel::None)
+    addPass(createRISCVPromoteConstantPass());
   if (TM->getOptLevel() != CodeGenOptLevel::None) {
     // Add a barrier before instruction selection so that we will not get
     // deleted block address after enabling default outlining. See D99707 for
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7a95df4b2a47c..b575d76e897d2 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1378,8 +1378,7 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
       IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
   // We can't know the precise weights here, as they would depend on the value
   // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
-  setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(),
-                                              DEBUG_TYPE);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE);
   Type *IndexTy = DL.getIndexType(Call->getType());
   SmallVector<DominatorTree::UpdateType, 8> Updates;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index d85e4f7590197..9bdd8cb71f7f3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -479,7 +479,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
                                      const Twine &NameStr = "",
                                      InsertPosition InsertBefore = nullptr) {
     auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
-    setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, DEBUG_TYPE, &F);
     return Sel;
   }
 
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0577ddbd2353c..0f3e66476f055 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -330,8 +330,7 @@ static void buildPartialUnswitchConditionalBranch(
       HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
                        : nullptr);
   if (!HasBranchWeights)
-    setExplicitlyUnknownBranchWeightsIfProfiled(
-        *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
+    setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
 }
 
 /// Copy a set of loop invariant values, and conditionally branch on them.
@@ -389,8 +388,7 @@ static void buildPartialInvariantUnswitchConditionalBranch(
       IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                        Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
   if (!ProfData)
-    setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(),
-                                                DEBUG_TYPE);
+    setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
 }
 
 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -3204,8 +3202,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
   auto *InvariantBr =
       Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
   // We don't know anything about the relation between the limits.
-  setExplicitlyUnknownBranchWeightsIfProfiled(
-      *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*InvariantBr, DEBUG_TYPE);
 
   Builder.SetInsertPoint(CheckBlock);
   Builder.CreateCondBr(
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3a3e3ade20212..9a8dbebe5bfba 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5214,8 +5214,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
     // We don't have any info about this condition.
     auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
                              : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-    setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(),
-                                                DEBUG_TYPE);
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE);
 
     OldTI->eraseFromParent();
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2588c878d8472..9e65399e75dc7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -167,8 +167,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
     if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
       return;
 
-    if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
-        Candidate->mayReadOrWriteMemory())
+    if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate))
       return;
 
     if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
new file mode 100644
index 0000000000000..006713ccabf47
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
@@ -0,0 +1,303 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>)
+
+define void @lasx_cast_128_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x float>, ptr %va
+  %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a)
+  store <8 x float> %b, ptr %vd
+  ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>)
+
+define void @lasx_cast_128_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <2 x double>, ptr %va
+  %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a)
+  store <4 x double> %b, ptr %vd
+  ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>)
+
+define void @lasx_cast_128(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <2 x i64>, ptr %va
+  %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a)
+  store <4 x i64> %b, ptr %vd
+  ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
+
+define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x float>, ptr %va
+  %b = load <4 x float>, ptr %vb
+  %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b)
+  store <8 x float> %c, ptr %vd
+  ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
+
+define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <2 x double>, ptr %va
+  %b = load <2 x double>, ptr %vb
+  %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b)
+  store <4 x double> %c, ptr %vd
+  ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>)
+
+define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <2 x i64>, ptr %va
+  %b = load <2 x i64>, ptr %vb
+  %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b)
+  store <4 x i64> %c, ptr %vd
+  ret void
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
+
+define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <8 x float>, ptr %va
+  %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a)
+  store <4 x float> %c, ptr %vd
+  ret void
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
+
+define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x double>, ptr %va
+  %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a)
+  store <2 x double> %c, ptr %vd
+  ret void
+}
+
+declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>)
+
+define void @lasx_extract_128_lo(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x i64>, ptr %va
+  %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a)
+  store <2 x i64> %c, ptr %vd
+  ret void
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
+
+define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <8 x float>, ptr %va
+  %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a)
+  store <4 x float> %c, ptr %vd
+  ret void
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
+
+define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x double>, ptr %va
+  %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a)
+  store <2 x double> %c, ptr %vd
+  ret void
+}
+
+declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>)
+
+define void @lasx_extract_128_hi(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x i64>, ptr %va
+  %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a)
+  store <2 x i64> %c, ptr %vd
+  ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
+
+define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <8 x float>, ptr %va
+  %b = load <4 x float>, ptr %vb
+  %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b)
+  store <8 x float> %c, ptr %vd
+  ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
+
+define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x double>, ptr %va
+  %b = load <2 x double>, ptr %vb
+  %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b)
+  store <4 x double> %c, ptr %vd
+  ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>)
+
+define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x i64>, ptr %va
+  %b = load <2 x i64>, ptr %vb
+  %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b)
+  store <4 x i64> %c, ptr %vd
+  ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
+
+define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <8 x float>, ptr %va
+  %b = load <4 x float>, ptr %vb
+  %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b)
+  store <8 x float> %c, ptr %vd
+  ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
+
+define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x double>, ptr %va
+  %b = load <2 x double>, ptr %vb
+  %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b)
+  store <4 x double> %c, ptr %vd
+  ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>)
+
+define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %a = load <4 x i64>, ptr %va
+  %b = load <2 x i64>, ptr %vb
+  %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b)
+  store <4 x i64> %c, ptr %vd
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/vec_rounding.ll b/llvm/test/CodeGen/PowerPC/vec_rounding.ll
index 2f16a435440ff..438c8ebdc099e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll
@@ -1,172 +1,251 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
 
 ; Check vector round to single-precision toward -infinity (vrfim)
 ; instruction generation using Altivec.
 
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
 declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
 define <2 x double> @floor_v2f64(<2 x double> %p)
+; CHECK-LABEL: floor_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    frim 1, 1
+; CHECK-NEXT:    frim 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK-LABEL: floor_v2f64:
-; CHECK: frim
-; CHECK: frim
 
 declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
 define <4 x double> @floor_v4f64(<4 x double> %p)
+; CHECK-LABEL: floor_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    frim 1, 1
+; CHECK-NEXT:    frim 2, 2
+; CHECK-NEXT:    frim 3, 3
+; CHECK-NEXT:    frim 4, 4
+; CHECK-NEXT:    blr
 {
   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK-LABEL: floor_v4f64:
-; CHECK: frim
-; CHECK: frim
-; CHECK: frim
-; CHECK: frim
 
 declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
 define <2 x double> @ceil_v2f64(<2 x double> %p)
+; CHECK-LABEL: ceil_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    frip 1, 1
+; CHECK-NEXT:    frip 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK-LABEL: ceil_v2f64:
-; CHECK: frip
-; CHECK: frip
 
 declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
 define <4 x double> @ceil_v4f64(<4 x double> %p)
+; CHECK-LABEL: ceil_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    frip 1, 1
+; CHECK-NEXT:    frip 2, 2
+; CHECK-NEXT:    frip 3, 3
+; CHECK-NEXT:    frip 4, 4
+; CHECK-NEXT:    blr
 {
   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK-LABEL: ceil_v4f64:
-; CHECK: frip
-; CHECK: frip
-; CHECK: frip
-; CHECK: frip
 
 declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
 define <2 x double> @trunc_v2f64(<2 x double> %p)
+; CHECK-LABEL: trunc_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    friz 1, 1
+; CHECK-NEXT:    friz 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK-LABEL: trunc_v2f64:
-; CHECK: friz
-; CHECK: friz
 
 declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
 define <4 x double> @trunc_v4f64(<4 x double> %p)
+; CHECK-LABEL: trunc_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    friz 1, 1
+; CHECK-NEXT:    friz 2, 2
+; CHECK-NEXT:    friz 3, 3
+; CHECK-NEXT:    friz 4, 4
+; CHECK-NEXT:    blr
 {
   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK-LABEL: trunc_v4f64:
-; CHECK: friz
-; CHECK: friz
-; CHECK: friz
-; CHECK: friz
 
 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
-define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p) nounwind
+; CHECK-LABEL: nearbyint_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -128(1)
+; CHECK-NEXT:    std 0, 144(1)
+; CHECK-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    fmr 31, 2
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 30, 1
+; CHECK-NEXT:    fmr 1, 31
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 2, 1
+; CHECK-NEXT:    fmr 1, 30
+; CHECK-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 128
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
 {
   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
 
 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
-define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p) nounwind
+; CHECK-LABEL: nearbyint_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -144(1)
+; CHECK-NEXT:    std 0, 160(1)
+; CHECK-NEXT:    stfd 28, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    fmr 29, 2
+; CHECK-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT:    fmr 30, 3
+; CHECK-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT:    fmr 31, 4
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 28, 1
+; CHECK-NEXT:    fmr 1, 29
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 29, 1
+; CHECK-NEXT:    fmr 1, 30
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 30, 1
+; CHECK-NEXT:    fmr 1, 31
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    fmr 4, 1
+; CHECK-NEXT:    fmr 1, 28
+; CHECK-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 28, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT:    fmr 2, 29
+; CHECK-NEXT:    fmr 3, 30
+; CHECK-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 144
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
 {
   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK-LABEL: nearbyint_v4f64:
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
-; CHECK: bl nearbyint
 
 
 declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
 define <4 x float> @floor_v4f32(<4 x float> %p)
+; CHECK-LABEL: floor_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfim 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK-LABEL: floor_v4f32:
-; CHECK: vrfim
 
 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
 define <8 x float> @floor_v8f32(<8 x float> %p)
+; CHECK-LABEL: floor_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfim 2, 2
+; CHECK-NEXT:    vrfim 3, 3
+; CHECK-NEXT:    blr
 {
   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK-LABEL: floor_v8f32:
-; CHECK: vrfim
-; CHECK: vrfim
 
 declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
 define <4 x float> @ceil_v4f32(<4 x float> %p)
+; CHECK-LABEL: ceil_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfip 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK-LABEL: ceil_v4f32:
-; CHECK: vrfip
 
 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
 define <8 x float> @ceil_v8f32(<8 x float> %p)
+; CHECK-LABEL: ceil_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfip 2, 2
+; CHECK-NEXT:    vrfip 3, 3
+; CHECK-NEXT:    blr
 {
   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK-LABEL: ceil_v8f32:
-; CHECK: vrfip
-; CHECK: vrfip
 
 declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
 define <4 x float> @trunc_v4f32(<4 x float> %p)
+; CHECK-LABEL: trunc_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfiz 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK-LABEL: trunc_v4f32:
-; CHECK: vrfiz
 
 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
 define <8 x float> @trunc_v8f32(<8 x float> %p)
+; CHECK-LABEL: trunc_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfiz 2, 2
+; CHECK-NEXT:    vrfiz 3, 3
+; CHECK-NEXT:    blr
 {
   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK-LABEL: trunc_v8f32:
-; CHECK: vrfiz
-; CHECK: vrfiz
 
 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
 define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+; CHECK-LABEL: nearbyint_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfin 2, 2
+; CHECK-NEXT:    blr
 {
   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK-LABEL: nearbyint_v4f32:
-; CHECK: vrfin
 
 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
 define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+; CHECK-LABEL: nearbyint_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrfin 2, 2
+; CHECK-NEXT:    vrfin 3, 3
+; CHECK-NEXT:    blr
 {
   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK-LABEL: nearbyint_v8f32:
-; CHECK: vrfin
-; CHECK: vrfin
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index ea08061221fd4..769823d1c4216 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -75,6 +75,7 @@
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Exception handling preparation
+; CHECK-NEXT:     RISC-V Promote Constants
 ; CHECK-NEXT:     A No-Op Barrier Pass
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Merge internal globals
diff --git a/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir
new file mode 100644
index 0000000000000..7844589e3f93c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir
@@ -0,0 +1,35 @@
+# RUN: llc %s -mtriple=riscv64 \
+# RUN: -run-pass=cfi-instr-inserter \
+# RUN: -riscv-enable-cfi-instr-inserter=true
+# XFAIL: *
+
+# Technically, it is possible that a callee-saved register is saved in multiple different locations.
+# CFIInstrInserter should handle this, but currently it does not.
+---
+name: multiple_locations
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x9, $x2
+    BEQ $x10, $x0, %bb.3
+    PseudoBR %bb.2
+
+  bb.1:
+    liveins: $x10, $x9, $x2
+    $x5 = COPY $x9
+    CFI_INSTRUCTION register $x9, $x5
+    $x9 = COPY $x5
+    CFI_INSTRUCTION register $x9, $x9
+    PseudoBR %bb.3
+
+  bb.2:
+    liveins: $x10, $x9, $x2
+    SD $x9, $x2, 0 :: (store (s64))
+    CFI_INSTRUCTION offset $x9, 0
+    $x9  = LD $x2, 0 :: (load (s64))
+    CFI_INSTRUCTION register $x9, $x9
+    PseudoBR %bb.3
+
+  bb.3:
+    PseudoRET
+...
diff --git a/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll
new file mode 100644
index 0000000000000..2bde6013b3640
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -S -riscv-promote-const -mtriple=riscv64 -mattr=+d | FileCheck %s
+
+; No promotion should take place, as the pass skips floats.
+define float @multiple_floats(float %a, float %b) {
+; CHECK-LABEL: define float @multiple_floats(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[B]], 2.000000e+00
+; CHECK-NEXT:    [[SUM_F:%.*]] = fadd float [[ADD1]], [[ADD2]]
+; CHECK-NEXT:    ret float [[SUM_F]]
+;
+entry:
+  %add1 = fadd float %a, 1.0
+  %add2 = fadd float %b, 2.0
+  %sum_f = fadd float %add1, %add2
+  ret float %sum_f
+}
+
+; No promotion should take place as cases with a single constant are skipped.
+define double @single_double(double %a) {
+; CHECK-LABEL: define double @single_double(
+; CHECK-SAME: double [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[A]], 4.210000e+01
+; CHECK-NEXT:    ret double [[ADD]]
+;
+entry:
+  %add = fadd double %a, 42.1
+  ret double %add
+}
+
+; Promotion should happen as we have at least two unique constants that would
+; otherwise go in the constant pool.
+define double @multiple_doubles(double %a, double %b) {
+; CHECK-LABEL: define double @multiple_doubles(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles, i64 0, i64 1), align 8
+; CHECK-NEXT:    [[ADD3:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles, align 8
+; CHECK-NEXT:    [[ADD2:%.*]] = fadd double [[A]], [[ADD3]]
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd double [[B]], [[DOUBLE_VAL1]]
+; CHECK-NEXT:    [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]]
+; CHECK-NEXT:    [[SUM1:%.*]] = fadd double [[ADD4]], [[SUM]]
+; CHECK-NEXT:    ret double [[SUM1]]
+;
+entry:
+  %add1 = fadd double %a, 2.718
+  %add2 = fadd double %b, 42.1
+  %add3 = fadd double %add1, 2.718
+  %sum = fadd double %add2, %add3
+  ret double %sum
+}
+
+; Promotion should not happen as the constants will be materialised rather
+; than using the constant pool.
+define double @multiple_doubles_no_promote(double %a, double %b) {
+; CHECK-LABEL: define double @multiple_doubles_no_promote(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[A]], 1.000000e+00
+; CHECK-NEXT:    [[ADD2:%.*]] = fadd double [[B]], 2.000000e+00
+; CHECK-NEXT:    [[ADD3:%.*]] = fadd double [[ADD1]], 1.000000e+00
+; CHECK-NEXT:    [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]]
+; CHECK-NEXT:    ret double [[SUM]]
+;
+entry:
+  %add1 = fadd double %a, 1.0
+  %add2 = fadd double %b, 2.0
+  %add3 = fadd double %add1, 1.0
+  %sum = fadd double %add2, %add3
+  ret double %sum
+}
+
+; The same constant shouldn't be loaded more than once per BB.
+define double @multiple_doubles_multi_bb(double %a, i1 %cond) {
+; CHECK-LABEL: define double @multiple_doubles_multi_bb(
+; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]]
+; CHECK:       [[IF_TRUE]]:
+; CHECK-NEXT:    [[DOUBLE_VAL2:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8
+; CHECK-NEXT:    [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8
+; CHECK-NEXT:    [[ADD_T:%.*]] = fadd double [[A]], [[DOUBLE_VAL]]
+; CHECK-NEXT:    [[MUL_T:%.*]] = fmul double [[ADD_T]], [[DOUBLE_VAL2]]
+; CHECK-NEXT:    [[SUB_T:%.*]] = fsub double [[MUL_T]], [[DOUBLE_VAL]]
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_FALSE]]:
+; CHECK-NEXT:    [[DOUBLE_VAL3:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8
+; CHECK-NEXT:    [[DOUBLE_VAL1:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8
+; CHECK-NEXT:    [[ADD_F:%.*]] = fadd double [[A]], [[DOUBLE_VAL1]]
+; CHECK-NEXT:    [[MUL_F:%.*]] = fmul double [[ADD_F]], [[DOUBLE_VAL3]]
+; CHECK-NEXT:    [[SUB_F:%.*]] = fsub double [[MUL_F]], [[DOUBLE_VAL1]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[PHI_RES:%.*]] = phi double [ [[SUB_T]], %[[IF_TRUE]] ], [ [[SUB_F]], %[[IF_FALSE]] ]
+; CHECK-NEXT:    ret double [[PHI_RES]]
+;
+entry:
+  br i1 %cond, label %if.true, label %if.false
+
+if.true:
+  %add.t = fadd double %a, 1.23
+  %mul.t = fmul double %add.t, 4.56
+  %sub.t = fsub double %mul.t, 1.23
+  br label %if.end
+
+if.false:
+  %add.f = fadd double %a, 1.23
+  %mul.f = fmul double %add.f, 4.56
+  %sub.f = fsub double %mul.f, 1.23
+  br label %if.end
+
+if.end:
+  %phi.res = phi double [ %sub.t, %if.true ], [ %sub.f, %if.false ]
+  ret double %phi.res
+}
+
+; Check the insertion point in the case we have a phi taking a constant C and
+; the source block also uses that same constant.
+define double @multiple_doubles_phi(double %a, i1 %cond) {
+; CHECK-LABEL: define double @multiple_doubles_phi(
+; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_phi, align 8
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A]], [[DOUBLE_VAL]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[PHI_VAL:%.*]] = phi double [ [[DOUBLE_VAL]], %[[IF_THEN]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_phi, i64 0, i64 1), align 8
+; CHECK-NEXT:    [[RES:%.*]] = fadd double [[PHI_VAL]], [[DOUBLE_VAL1]]
+; CHECK-NEXT:    ret double [[RES]]
+;
+entry:
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  %mul = fmul double %a, 1.23
+  br label %if.end
+
+if.end:
+  %phi.val = phi double [ 1.23, %if.then ], [ %a, %entry ]
+  %res = fadd double %phi.val, 4.56
+  ret double %res
+}
diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll
new file mode 100644
index 0000000000000..aef44cc3e40d0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr166534.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
+
+define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
+; SSE2-LABEL: pr166534:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    movq 8(%rdi), %r8
+; SSE2-NEXT:    movdqu (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rsi), %r9
+; SSE2-NEXT:    movq 8(%rsi), %rdi
+; SSE2-NEXT:    movdqu (%rsi), %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %esi
+; SSE2-NEXT:    xorl %r10d, %r10d
+; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
+; SSE2-NEXT:    sete %r10b
+; SSE2-NEXT:    orq %r10, (%rdx)
+; SSE2-NEXT:    cmpl $65535, %esi # imm = 0xFFFF
+; SSE2-NEXT:    jne .LBB0_2
+; SSE2-NEXT:  # %bb.1: # %if.then
+; SSE2-NEXT:    xorq %r9, %rax
+; SSE2-NEXT:    xorq %rdi, %r8
+; SSE2-NEXT:    xorl %edx, %edx
+; SSE2-NEXT:    orq %rax, %r8
+; SSE2-NEXT:    sete %dl
+; SSE2-NEXT:    orq %rdx, (%rcx)
+; SSE2-NEXT:  .LBB0_2: # %if.end
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: pr166534:
+; SSE4:       # %bb.0: # %entry
+; SSE4-NEXT:    movq (%rdi), %rax
+; SSE4-NEXT:    movq 8(%rdi), %r8
+; SSE4-NEXT:    movdqu (%rdi), %xmm0
+; SSE4-NEXT:    movq (%rsi), %r9
+; SSE4-NEXT:    movq 8(%rsi), %rdi
+; SSE4-NEXT:    movdqu (%rsi), %xmm1
+; SSE4-NEXT:    pxor %xmm0, %xmm1
+; SSE4-NEXT:    xorl %esi, %esi
+; SSE4-NEXT:    ptest %xmm1, %xmm1
+; SSE4-NEXT:    sete %sil
+; SSE4-NEXT:    orq %rsi, (%rdx)
+; SSE4-NEXT:    ptest %xmm1, %xmm1
+; SSE4-NEXT:    jne .LBB0_2
+; SSE4-NEXT:  # %bb.1: # %if.then
+; SSE4-NEXT:    xorq %r9, %rax
+; SSE4-NEXT:    xorq %rdi, %r8
+; SSE4-NEXT:    xorl %edx, %edx
+; SSE4-NEXT:    orq %rax, %r8
+; SSE4-NEXT:    sete %dl
+; SSE4-NEXT:    orq %rdx, (%rcx)
+; SSE4-NEXT:  .LBB0_2: # %if.end
+; SSE4-NEXT:    retq
+;
+; AVX2-LABEL: pr166534:
+; AVX2:       # %bb.0: # %entry
+; AVX2-NEXT:    movq (%rdi), %rax
+; AVX2-NEXT:    movq 8(%rdi), %r8
+; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX2-NEXT:    movq (%rsi), %rdi
+; AVX2-NEXT:    vpxor (%rsi), %xmm0, %xmm0
+; AVX2-NEXT:    movq 8(%rsi), %rsi
+; AVX2-NEXT:    xorl %r9d, %r9d
+; AVX2-NEXT:    vptest %xmm0, %xmm0
+; AVX2-NEXT:    sete %r9b
+; AVX2-NEXT:    orq %r9, (%rdx)
+; AVX2-NEXT:    vptest %xmm0, %xmm0
+; AVX2-NEXT:    jne .LBB0_2
+; AVX2-NEXT:  # %bb.1: # %if.then
+; AVX2-NEXT:    xorq %rdi, %rax
+; AVX2-NEXT:    xorq %rsi, %r8
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    orq %rax, %r8
+; AVX2-NEXT:    sete %dl
+; AVX2-NEXT:    orq %rdx, (%rcx)
+; AVX2-NEXT:  .LBB0_2: # %if.end
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: pr166534:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    movq (%rdi), %rax
+; AVX512-NEXT:    movq 8(%rdi), %r8
+; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX512-NEXT:    movq (%rsi), %r9
+; AVX512-NEXT:    movq 8(%rsi), %rdi
+; AVX512-NEXT:    vpxor (%rsi), %xmm0, %xmm0
+; AVX512-NEXT:    xorl %esi, %esi
+; AVX512-NEXT:    vptest %xmm0, %xmm0
+; AVX512-NEXT:    sete %sil
+; AVX512-NEXT:    orq %rsi, (%rdx)
+; AVX512-NEXT:    vptest %xmm0, %xmm0
+; AVX512-NEXT:    jne .LBB0_2
+; AVX512-NEXT:  # %bb.1: # %if.then
+; AVX512-NEXT:    xorq %r9, %rax
+; AVX512-NEXT:    xorq %rdi, %r8
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    orq %rax, %r8
+; AVX512-NEXT:    sete %dl
+; AVX512-NEXT:    orq %rdx, (%rcx)
+; AVX512-NEXT:  .LBB0_2: # %if.end
+; AVX512-NEXT:    retq
+entry:
+  %a = load i128, ptr %pa, align 8
+  %b = load i128, ptr %pb, align 8
+  %cmp = icmp eq i128 %a, %b
+  %conv1 = zext i1 %cmp to i128
+  %c = load i128, ptr %pc, align 8
+  %or = or i128 %c, %conv1
+  store i128 %or, ptr %pc, align 8
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %d = load i128, ptr %pd, align 8
+  %or7 = or i128 %d, %conv1
+  store i128 %or7, ptr %pd, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/llvm/test/DebugInfo/Hexagon/lit.local.cfg b/llvm/test/DebugInfo/Hexagon/lit.local.cfg
new file mode 100644
index 0000000000000..3bed54b1a88d2
--- /dev/null
+++ b/llvm/test/DebugInfo/Hexagon/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "Hexagon" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/DebugInfo/Hexagon/packet-debug.mir b/llvm/test/DebugInfo/Hexagon/packet-debug.mir
new file mode 100644
index 0000000000000..485b543b6e176
--- /dev/null
+++ b/llvm/test/DebugInfo/Hexagon/packet-debug.mir
@@ -0,0 +1,48 @@
+# RUN: llc -mtriple=hexagon -run-pass hexagon-packetizer %s -o - | FileCheck %s
+
+# CHECK-LABEL: name: factorial
+
+# The first bundle in bb.0 should have debug-location !19 (line 9),
+# not !18 (line 0) from the DBG_VALUE instructions.
+# CHECK: bb.0:
+# CHECK: BUNDLE {{.*}}line: 9
+
+--- |
+  define void @factorial() { ret void }
+
+  !llvm.dbg.cu = !{!2}
+  !llvm.module.flags = !{!6, !7}
+
+  !2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "test", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+  !3 = !DIFile(filename: "fact.c", directory: "/test")
+  !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !6 = !{i32 2, !"Debug Info Version", i32 3}
+  !7 = !{i32 1, !"wchar_size", i32 4}
+  !12 = distinct !DISubprogram(name: "factorial", scope: !3, file: !3, line: 6, type: !13, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+  !13 = !DISubroutineType(types: !14)
+  !14 = !{!5, !5}
+  !16 = !DILocalVariable(name: "i", arg: 1, scope: !12, file: !3, line: 6, type: !5)
+  !18 = !DILocation(line: 0, scope: !12)
+  !19 = !DILocation(line: 9, column: 9, scope: !12)
+  !21 = !DILocation(line: 9, column: 7, scope: !12)
+
+...
+---
+name:            factorial
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $r0
+
+    DBG_VALUE $r0, $noreg, !16, !DIExpression(), debug-location !18
+    $r2 = A2_tfr $r0
+    DBG_VALUE $r2, $noreg, !16, !DIExpression(), debug-location !18
+    renamable $p0 = C2_cmpeqi killed $r0, 1, debug-location !19
+    renamable $r0 = A2_tfrsi 1
+    J2_jumpt killed $p0, %bb.1, implicit-def $pc, debug-location !21
+
+  bb.1:
+    PS_jmpret $r31, implicit-def dead $pc
+
+...
diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test
index b940488786631..cafa1ae3c0663 100644
--- a/llvm/test/Demangle/ms-operators.test
+++ b/llvm/test/Demangle/ms-operators.test
@@ -143,9 +143,24 @@
 ??_7A@B@@6BC@D@@@
 ; CHECK: const B::A::`vftable'{for `D::C'}
 
+??_7A@B@@6BC@D@@E@F@@@
+; CHECK: const B::A::`vftable'{for `D::C's `F::E'}
+
+??_7A@B@@6BC@D@@E@F@@G@H@@@
+; CHECK: const B::A::`vftable'{for `D::C's `F::E's `H::G'}
+
 ??_8Middle2@@7B@
 ; CHECK: const Middle2::`vbtable'
 
+??_7A@@6BB@@@
+; CHECK: const A::`vftable'{for `B'}
+
+??_7A@@6BB@@C@@@
+; CHECK: const A::`vftable'{for `B's `C'}
+
+??_7A@@6BB@@C@@D@@@
+; CHECK: const A::`vftable'{for `B's `C's `D'}
+
 ??_9Base@@$B7AA
 ; CHECK: [thunk]: __cdecl Base::`vcall'{8, {flat}}
 
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 8648651f3d714..5bd7890e0ddd1 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -186,8 +186,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // IMPL:       #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-NEXT:  #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-EMPTY:
-// IMPL-NEXT:  namespace llvm {
-// IMPL-NEXT:  namespace tdl {
+// IMPL-NEXT:  namespace llvm::tdl {
 // IMPL-EMPTY:
 // IMPL-NEXT:    // Sets for dira
 // IMPL-EMPTY:
@@ -204,8 +203,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // IMPL-EMPTY:
 // IMPL-NEXT:    static  requiredClauses_TDLD_dira {
 // IMPL-NEXT:    };
-// IMPL-NEXT:  } // namespace tdl
-// IMPL-NEXT:  } // namespace llvm
+// IMPL-EMPTY:
+// IMPL-NEXT:  } // namespace llvm::tdl
 // IMPL-EMPTY:
 // IMPL-NEXT:  #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-EMPTY:
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index 96022d7647440..eaaf82ddaaf41 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -159,8 +159,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // IMPL:      #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-EMPTY:
-// IMPL-NEXT: namespace llvm {
-// IMPL-NEXT: namespace tdl {
+// IMPL-NEXT: namespace llvm::tdl {
 // IMPL-EMPTY:
 // IMPL-NEXT:   // Sets for dira
 // IMPL-EMPTY:
@@ -177,8 +176,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // IMPL-EMPTY:
 // IMPL-NEXT:   static  requiredClauses_TDLD_dira {
 // IMPL-NEXT:   };
-// IMPL-NEXT: } // namespace tdl
-// IMPL-NEXT: } // namespace llvm
+// IMPL-EMPTY:
+// IMPL-NEXT: } // namespace llvm::tdl
 // IMPL-EMPTY:
 // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS
 // IMPL-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index a852b731ea13b..9e523be618b44 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -12,12 +12,15 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
+; CHECK-NEXT:    [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
 ; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
@@ -26,7 +29,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF1]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
 ; CHECK-NEXT:    store ptr [[TMP8]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
 ; CHECK:       [[PRED_STORE_CONTINUE2]]:
@@ -35,7 +38,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF3]]:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
 ; CHECK-NEXT:    store ptr [[TMP12]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
 ; CHECK:       [[PRED_STORE_CONTINUE4]]:
@@ -44,7 +47,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[PRED_STORE_IF5]]:
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
 ; CHECK-NEXT:    store ptr [[TMP16]], ptr [[TMP15]], align 8
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index d33bf45595e2e..0bb743dc8a7f5 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -359,7 +359,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     OS << "  static constexpr bool is_iterable = true;\n";
     OS << "};\n";
   }
-  LlvmNS.close();
 }
 
 // Given a list of spellings (for a given clause/directive), order them
@@ -931,27 +930,20 @@ static void generateClauseSet(ArrayRef<const Record *> VerClauses,
 // Generate an enum set for the 4 kinds of clauses linked to a directive.
 static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
                                         Frontend FE, raw_ostream &OS) {
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_SETS");
 
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_SETS";
-  IfDefEmitter Scope(OS, IfDefName);
-
-  StringRef Namespace =
-      getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE);
+  std::string Namespace =
+      getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE).str();
   // The namespace has to be different for clang vs flang, as 2 structs with the
   // same name but different layout is UB.  So just put the 'clang' on in the
   // clang namespace.
-  OS << "namespace " << Namespace << " {\n";
-
-  // Open namespaces defined in the directive language.
-  SmallVector<StringRef, 2> Namespaces;
-  SplitString(DirLang.getCppNamespace(), Namespaces, "::");
-  for (auto Ns : Namespaces)
-    OS << "namespace " << Ns << " {\n";
+  // Additionally, open namespaces defined in the directive language.
+  if (!DirLang.getCppNamespace().empty())
+    Namespace += "::" + DirLang.getCppNamespace().str();
+  NamespaceEmitter NS(OS, Namespace);
 
   for (const Directive Dir : DirLang.getDirectives()) {
-    OS << "\n";
     OS << "// Sets for " << Dir.getSpellingForIdentifier() << "\n";
 
     generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir,
@@ -963,12 +955,6 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
     generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir,
                       DirLang, FE);
   }
-
-  // Closing namespaces
-  for (auto Ns : reverse(Namespaces))
-    OS << "} // namespace " << Ns << "\n";
-
-  OS << "} // namespace " << Namespace << "\n";
 }
 
 // Generate a map of directive (key) with DirectiveClauses struct as values.
@@ -976,10 +962,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
 // allowances (allowed, allowed once, allowed exclusive and required).
 static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang,
                                        Frontend FE, raw_ostream &OS) {
-  std::string IfDefName{"GEN_"};
-  IfDefName += getFESpelling(FE).upper();
-  IfDefName += "_DIRECTIVE_CLAUSE_MAP";
-  IfDefEmitter Scope(OS, IfDefName);
+  IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() +
+                             "_DIRECTIVE_CLAUSE_MAP");
 
   OS << "{\n";
 
diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn
index 393309ee39bfe..a261f2866be47 100644
--- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn
+++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn
@@ -12,7 +12,7 @@ static_library("Passes") {
     "//llvm/utils/gn/build/libs/pthread",
   ]
   sources = [
-    "ADRRelaxationPass.cpp",
+    "AArch64RelaxationPass.cpp",
     "Aligner.cpp",
     "AllocCombiner.cpp",
     "AsmDump.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
index 2f84999621e1b..3c3fdf7e16885 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
@@ -82,6 +82,7 @@ static_library("bugprone") {
     "SmartPtrArrayMismatchCheck.cpp",
     "SpuriouslyWakeUpFunctionsCheck.cpp",
     "StandaloneEmptyCheck.cpp",
+    "StdNamespaceModificationCheck.cpp",
     "StringConstructorCheck.cpp",
     "StringIntegerAssignmentCheck.cpp",
     "StringLiteralWithEmbeddedNulCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
index ec642b6afad66..1eae289143b5b 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn
@@ -16,7 +16,6 @@ static_library("cert") {
   ]
   sources = [
     "CERTTidyModule.cpp",
-    "DontModifyStdNamespaceCheck.cpp",
     "FloatLoopCounter.cpp",
     "LimitedRandomnessCheck.cpp",
     "MutatingCopyCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn
index 065fc6cdd74a3..bd8d9610c2a4a 100644
--- a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn
@@ -17,6 +17,7 @@ unittest("ClangCodeGenTests") {
     "BufferSourceTest.cpp",
     "CheckTargetFeaturesTest.cpp",
     "CodeGenExternalTest.cpp",
+    "DemangleTrapReasonInDebugInfo.cpp",
     "TBAAMetadataTest.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn
index 9848efef70568..fa99fa8649caf 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn
@@ -1,10 +1,16 @@
 static_library("CPlusPlus") {
   output_name = "lldbPluginCPPRuntime"
-  configs += [ "//llvm/utils/gn/build:lldb_code" ]
+  configs += [
+    "//llvm/utils/gn/build:clang_code",
+    "//llvm/utils/gn/build:lldb_code",
+  ]
   deps = [
     "//lldb/source/Core",
     "//lldb/source/Symbol",
     "//lldb/source/Target",
   ]
-  sources = [ "CPPLanguageRuntime.cpp" ]
+  sources = [
+    "CPPLanguageRuntime.cpp",
+    "VerboseTrapFrameRecognizer.cpp",
+  ]
 }
diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
index a863baf912051..783eb96283596 100644
--- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
@@ -105,6 +105,5 @@ static_library("Target") {
     "UnixSignals.cpp",
     "UnwindAssembly.cpp",
     "UnwindLLDB.cpp",
-    "VerboseTrapFrameRecognizer.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn
index 1a890f6733597..a234d2be67f66 100644
--- a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn
@@ -12,7 +12,6 @@ static_library("BinaryFormat") {
     "ELF.cpp",
     "MachO.cpp",
     "Magic.cpp",
-    "Minidump.cpp",
     "MsgPackDocument.cpp",
     "MsgPackDocumentYAML.cpp",
     "MsgPackReader.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
index a1f5b475e2096..ad72c0069237d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
@@ -151,6 +151,7 @@ static_library("LLVMRISCVCodeGen") {
     "RISCVMoveMerger.cpp",
     "RISCVOptWInstrs.cpp",
     "RISCVPostRAExpandPseudoInsts.cpp",
+    "RISCVPromoteConstant.cpp",
     "RISCVPushPopOptimizer.cpp",
     "RISCVRedundantCopyElimination.cpp",
     "RISCVRegisterInfo.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn
index 2ece91331c5d8..11a57fcb008cd 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn
@@ -12,10 +12,17 @@ tablegen("WebAssemblyGenFastISel") {
   td_file = "WebAssembly.td"
 }
 
+tablegen("WebAssemblyGenSDNodeInfo") {
+  visibility = [ ":LLVMWebAssemblyCodeGen" ]
+  args = [ "-gen-sd-node-info" ]
+  td_file = "WebAssembly.td"
+}
+
 static_library("LLVMWebAssemblyCodeGen") {
   deps = [
     ":WebAssemblyGenDAGISel",
     ":WebAssemblyGenFastISel",
+    ":WebAssemblyGenSDNodeInfo",
     "MCTargetDesc",
     "TargetInfo",
     "//llvm/include/llvm/Config:llvm-config",
diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
index 964281592cc65..cad6cec761ab8 100644
--- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
+++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h
@@ -92,12 +92,43 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern<SourceOp> {
   using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
   using Super = VectorConvertToLLVMPattern<SourceOp, TargetOp>;
 
+  /// Return the given type if it's a floating point type. If the given type is
+  /// a vector type, return its element type if it's a floating point type.
+  static FloatType getFloatingPointType(Type type) {
+    if (auto floatType = dyn_cast<FloatType>(type))
+      return floatType;
+    if (auto vecType = dyn_cast<VectorType>(type))
+      return dyn_cast<FloatType>(vecType.getElementType());
+    return nullptr;
+  }
+
   LogicalResult
   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     static_assert(
         std::is_base_of<OpTrait::OneResult<SourceOp>, SourceOp>::value,
         "expected single result op");
+
+    // The pattern should not apply if a floating-point operand is converted to
+    // a non-floating-point type. This indicates that the floating point type
+    // is not supported by the LLVM lowering. (Such types are converted to
+    // integers.)
+    auto checkType = [&](Value v) -> LogicalResult {
+      FloatType floatType = getFloatingPointType(v.getType());
+      if (!floatType)
+        return success();
+      Type convertedType = this->getTypeConverter()->convertType(floatType);
+      if (!isa_and_nonnull<FloatType>(convertedType))
+        return rewriter.notifyMatchFailure(op,
+                                           "unsupported floating point type");
+      return success();
+    };
+    for (Value operand : op->getOperands())
+      if (failed(checkType(operand)))
+        return failure();
+    if (failed(checkType(op->getResult(0))))
+      return failure();
+
     // Determine attributes for the target op
     AttrConvert<SourceOp, TargetOp> attrConvert(op);
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 10f0cc254ea97..80bc0e5986e51 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -949,7 +949,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">,
   }];
 
   string llvmBuilder = [{
-    auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
+    auto [id, args] = NVVM::MBarrierTestWaitOp::getIntrinsicIDAndArgs(
                       *op, moduleTranslation, builder);
     $res = createIntrinsicCall(builder, id, args);
   }];
diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h
index ed7e2a08ebfd9..5ac9e26e8636d 100644
--- a/mlir/include/mlir/Transforms/DialectConversion.h
+++ b/mlir/include/mlir/Transforms/DialectConversion.h
@@ -981,6 +981,28 @@ class ConversionPatternRewriter final : public PatternRewriter {
   /// Return a reference to the internal implementation.
   detail::ConversionPatternRewriterImpl &getImpl();
 
+  /// Attempt to legalize the given operation. This can be used within
+  /// conversion patterns to change the default pre-order legalization order.
+  /// Returns "success" if the operation was legalized, "failure" otherwise.
+  ///
+  /// Note: In a partial conversion, this function returns "success" even if
+  /// the operation could not be legalized, as long as it was not explicitly
+  /// marked as illegal in the conversion target.
+  LogicalResult legalize(Operation *op);
+
+  /// Attempt to legalize the given region. This can be used within
+  /// conversion patterns to change the default pre-order legalization order.
+  /// Returns "success" if the region was legalized, "failure" otherwise.
+  ///
+  /// If the current pattern runs with a type converter, the entry block
+  /// signature will be converted before legalizing the operations in the
+  /// region.
+  ///
+  /// Note: In a partial conversion, this function returns "success" even if
+  /// an operation could not be legalized, as long as it was not explicitly
+  /// marked as illegal in the conversion target.
+  LogicalResult legalize(Region *r);
+
 private:
   // Allow OperationConverter to construct new rewriters.
   friend struct OperationConverter;
@@ -989,7 +1011,8 @@ class ConversionPatternRewriter final : public PatternRewriter {
   /// conversions. They apply some IR rewrites in a delayed fashion and could
   /// bring the IR into an inconsistent state when used standalone.
   explicit ConversionPatternRewriter(MLIRContext *ctx,
-                                     const ConversionConfig &config);
+                                     const ConversionConfig &config,
+                                     OperationConverter &converter);
 
   // Hide unsupported pattern rewriter API.
   using OpBuilder::setListener;
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index 2fe06970eb568..f8c38fadbd229 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -92,6 +92,22 @@ static OpBuilder::InsertPoint computeInsertPoint(ArrayRef<Value> vals) {
   return pt;
 }
 
+namespace {
+enum OpConversionMode {
+  /// In this mode, the conversion will ignore failed conversions to allow
+  /// illegal operations to co-exist in the IR.
+  Partial,
+
+  /// In this mode, all operations must be legal for the given target for the
+  /// conversion to succeed.
+  Full,
+
+  /// In this mode, operations are analyzed for legality. No actual rewrites are
+  /// applied to the operations on success.
+  Analysis,
+};
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // ConversionValueMapping
 //===----------------------------------------------------------------------===//
@@ -866,8 +882,9 @@ namespace mlir {
 namespace detail {
 struct ConversionPatternRewriterImpl : public RewriterBase::Listener {
   explicit ConversionPatternRewriterImpl(ConversionPatternRewriter &rewriter,
-                                         const ConversionConfig &config)
-      : rewriter(rewriter), config(config),
+                                         const ConversionConfig &config,
+                                         OperationConverter &opConverter)
+      : rewriter(rewriter), config(config), opConverter(opConverter),
         notifyingRewriter(rewriter.getContext(), config.listener) {}
 
   //===--------------------------------------------------------------------===//
@@ -1124,6 +1141,9 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener {
   /// Dialect conversion configuration.
   const ConversionConfig &config;
 
+  /// The operation converter to use for recursive legalization.
+  OperationConverter &opConverter;
+
   /// A set of erased operations. This set is utilized only if
   /// `allowPatternRollback` is set to "false". Conceptually, this set is
   /// similar to `replacedOps` (which is maintained when the flag is set to
@@ -2084,9 +2104,10 @@ void ConversionPatternRewriterImpl::notifyMatchFailure(
 //===----------------------------------------------------------------------===//
 
 ConversionPatternRewriter::ConversionPatternRewriter(
-    MLIRContext *ctx, const ConversionConfig &config)
-    : PatternRewriter(ctx),
-      impl(new detail::ConversionPatternRewriterImpl(*this, config)) {
+    MLIRContext *ctx, const ConversionConfig &config,
+    OperationConverter &opConverter)
+    : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl(
+                                *this, config, opConverter)) {
   setListener(impl.get());
 }
 
@@ -2207,6 +2228,37 @@ ConversionPatternRewriter::getRemappedValues(ValueRange keys,
   return success();
 }
 
+LogicalResult ConversionPatternRewriter::legalize(Region *r) {
+  // Fast path: If the region is empty, there is nothing to legalize.
+  if (r->empty())
+    return success();
+
+  // Gather a list of all operations to legalize. This is done before
+  // converting the entry block signature because unrealized_conversion_cast
+  // ops should not be included.
+  SmallVector<Operation *> ops;
+  for (Block &b : *r)
+    for (Operation &op : b)
+      ops.push_back(&op);
+
+  // If the current pattern runs with a type converter, convert the entry block
+  // signature.
+  if (const TypeConverter *converter = impl->currentTypeConverter) {
+    std::optional<TypeConverter::SignatureConversion> conversion =
+        converter->convertBlockSignature(&r->front());
+    if (!conversion)
+      return failure();
+    applySignatureConversion(&r->front(), *conversion, converter);
+  }
+
+  // Legalize all operations in the region.
+  for (Operation *op : ops)
+    if (failed(legalize(op)))
+      return failure();
+
+  return success();
+}
+
 void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest,
                                                   Block::iterator before,
                                                   ValueRange argValues) {
@@ -3192,22 +3244,6 @@ static void reconcileUnrealizedCasts(
 // OperationConverter
 //===----------------------------------------------------------------------===//
 
-namespace {
-enum OpConversionMode {
-  /// In this mode, the conversion will ignore failed conversions to allow
-  /// illegal operations to co-exist in the IR.
-  Partial,
-
-  /// In this mode, all operations must be legal for the given target for the
-  /// conversion to succeed.
-  Full,
-
-  /// In this mode, operations are analyzed for legality. No actual rewrites are
-  /// applied to the operations on success.
-  Analysis,
-};
-} // namespace
-
 namespace mlir {
 // This class converts operations to a given conversion target via a set of
 // rewrite patterns. The conversion behaves differently depending on the
@@ -3217,16 +3253,20 @@ struct OperationConverter {
                               const FrozenRewritePatternSet &patterns,
                               const ConversionConfig &config,
                               OpConversionMode mode)
-      : rewriter(ctx, config), opLegalizer(rewriter, target, patterns),
+      : rewriter(ctx, config, *this), opLegalizer(rewriter, target, patterns),
         mode(mode) {}
 
   /// Converts the given operations to the conversion target.
   LogicalResult convertOperations(ArrayRef<Operation *> ops);
 
-private:
-  /// Converts an operation with the given rewriter.
-  LogicalResult convert(Operation *op);
+  /// Converts a single operation. If `isRecursiveLegalization` is "true", the
+  /// conversion is a recursive legalization request, triggered from within a
+  /// pattern. In that case, do not emit errors because there will be another
+  /// attempt at legalizing the operation later (via the regular pre-order
+  /// legalization mechanism).
+  LogicalResult convert(Operation *op, bool isRecursiveLegalization = false);
 
+private:
   /// The rewriter to use when converting operations.
   ConversionPatternRewriter rewriter;
 
@@ -3238,32 +3278,42 @@ struct OperationConverter {
 };
 } // namespace mlir
 
-LogicalResult OperationConverter::convert(Operation *op) {
+LogicalResult ConversionPatternRewriter::legalize(Operation *op) {
+  return impl->opConverter.convert(op, /*isRecursiveLegalization=*/true);
+}
+
+LogicalResult OperationConverter::convert(Operation *op,
+                                          bool isRecursiveLegalization) {
   const ConversionConfig &config = rewriter.getConfig();
 
   // Legalize the given operation.
   if (failed(opLegalizer.legalize(op))) {
     // Handle the case of a failed conversion for each of the different modes.
     // Full conversions expect all operations to be converted.
-    if (mode == OpConversionMode::Full)
-      return op->emitError()
-             << "failed to legalize operation '" << op->getName() << "'";
+    if (mode == OpConversionMode::Full) {
+      if (!isRecursiveLegalization)
+        op->emitError() << "failed to legalize operation '" << op->getName()
+                        << "'";
+      return failure();
+    }
     // Partial conversions allow conversions to fail iff the operation was not
     // explicitly marked as illegal. If the user provided a `unlegalizedOps`
     // set, non-legalizable ops are added to that set.
     if (mode == OpConversionMode::Partial) {
-      if (opLegalizer.isIllegal(op))
-        return op->emitError()
-               << "failed to legalize operation '" << op->getName()
-               << "' that was explicitly marked illegal";
-      if (config.unlegalizedOps)
+      if (opLegalizer.isIllegal(op)) {
+        if (!isRecursiveLegalization)
+          op->emitError() << "failed to legalize operation '" << op->getName()
+                          << "' that was explicitly marked illegal";
+        return failure();
+      }
+      if (config.unlegalizedOps && !isRecursiveLegalization)
         config.unlegalizedOps->insert(op);
     }
   } else if (mode == OpConversionMode::Analysis) {
     // Analysis conversions don't fail if any operations fail to legalize,
     // they are only interested in the operations that were successfully
     // legalized.
-    if (config.legalizableOps)
+    if (config.legalizableOps && !isRecursiveLegalization)
       config.legalizableOps->insert(op);
   }
   return success();
diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
index ba12ff29ebef9..b5dcb01d3dc6b 100644
--- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
+++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir
@@ -747,3 +747,29 @@ func.func @memref_bitcast(%1: memref<?xi16>) -> memref<?xbf16> {
   %2 = arith.bitcast %1 : memref<?xi16> to memref<?xbf16>
   func.return %2 : memref<?xbf16>
 }
+
+// -----
+
+// CHECK-LABEL: func @unsupported_fp_type
+//       CHECK:   arith.addf {{.*}} : f4E2M1FN
+//       CHECK:   arith.addf {{.*}} : vector<4xf4E2M1FN>
+//       CHECK:   arith.addf {{.*}} : vector<8x4xf4E2M1FN>
+func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>) -> (f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>) {
+  %0 = arith.addf %arg0, %arg0 : f4E2M1FN
+  %1 = arith.addf %arg1, %arg1 : vector<4xf4E2M1FN>
+  %2 = arith.addf %arg2, %arg2 : vector<8x4xf4E2M1FN>
+  return %0, %1, %2 : f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>
+}
+
+// -----
+
+//   CHECK-LABEL: func @supported_fp_type
+//         CHECK:   llvm.fadd {{.*}} : f32
+//         CHECK:   llvm.fadd {{.*}} : vector<4xf32>
+// CHECK-COUNT-4:   llvm.fadd {{.*}} : vector<8xf32>
+func.func @supported_fp_type(%arg0: f32, %arg1: vector<4xf32>, %arg2: vector<4x8xf32>) -> (f32, vector<4xf32>, vector<4x8xf32>) {
+  %0 = arith.addf %arg0, %arg0 : f32
+  %1 = arith.addf %arg1, %arg1 : vector<4xf32>
+  %2 = arith.addf %arg2, %arg2 : vector<4x8xf32>
+  return %0, %1, %2 : f32, vector<4xf32>, vector<4x8xf32>
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir
new file mode 100644
index 0000000000000..9bb3b082777fd
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir
@@ -0,0 +1,116 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) {
+  // CHECK-LABEL: define void @cp_async_mbarrier_arrive(ptr addrspace(3) %0, ptr %1) {
+  // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %1)
+  // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %1)
+  // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %0)
+  // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %0)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr
+  nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr
+  nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3>
+  nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3>
+  llvm.return
+}
+
+llvm.func @mbarrier_init_generic(%barrier: !llvm.ptr) {
+  // CHECK-LABEL: define void @mbarrier_init_generic(ptr %0) {
+  // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init(ptr %0, i32 %2)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %count = nvvm.read.ptx.sreg.ntid.x : i32
+  nvvm.mbarrier.init %barrier, %count : !llvm.ptr, i32
+  llvm.return
+}
+
+llvm.func @mbarrier_init_shared(%barrier: !llvm.ptr<3>) {
+  // CHECK-LABEL: define void @mbarrier_init_shared(ptr addrspace(3) %0) {
+  // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %0, i32 %2)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %count = nvvm.read.ptx.sreg.ntid.x : i32
+  nvvm.mbarrier.init %barrier, %count : !llvm.ptr<3>, i32
+  llvm.return
+}
+
+llvm.func @mbarrier_inval_generic(%barrier: !llvm.ptr) {
+  // CHECK-LABEL: define void @mbarrier_inval_generic(ptr %0) {
+  // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval(ptr %0)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  nvvm.mbarrier.inval %barrier : !llvm.ptr
+  llvm.return
+}
+
+llvm.func @mbarrier_inval_shared(%barrier: !llvm.ptr<3>) {
+  // CHECK-LABEL: define void @mbarrier_inval_shared(ptr addrspace(3) %0) {
+  // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %0)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  nvvm.mbarrier.inval %barrier : !llvm.ptr<3>
+  llvm.return
+}
+
+llvm.func @mbarrier_arrive(%barrier: !llvm.ptr) {
+  // CHECK-LABEL: define void @mbarrier_arrive(ptr %0) {
+  // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive(ptr %0)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr  -> i64
+  llvm.return
+}
+
+llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) {
+  // CHECK-LABEL: define void @mbarrier_arrive_shared(ptr addrspace(3) %0) {
+  // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %0)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64
+  llvm.return
+}
+
+llvm.func @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) {
+  // CHECK-LABEL: define void @mbarrier_arrive_nocomplete(ptr %0) {
+  // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %0, i32 %2)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %count = nvvm.read.ptx.sreg.ntid.x : i32
+  %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr, i32 -> i64
+  llvm.return
+}
+
+llvm.func @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) {
+  // CHECK-LABEL: define void @mbarrier_arrive_nocomplete_shared(ptr addrspace(3) %0) {
+  // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %0, i32 %2)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %count = nvvm.read.ptx.sreg.ntid.x : i32
+  %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32  -> i64
+  llvm.return
+}
+
+llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 {
+  // CHECK-LABEL: define i1 @mbarrier_test_wait(ptr %0, i64 %1) {
+  // CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.test.wait(ptr %0, i64 %1)
+  // CHECK-NEXT: ret i1 %3
+  // CHECK-NEXT: }
+  %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr, i64 -> i1
+  llvm.return %isComplete : i1
+}
+
+llvm.func @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) {
+  // CHECK-LABEL: define void @mbarrier_test_wait_shared(ptr addrspace(3) %0, i64 %1) {
+  // CHECK-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %0, i64 %1)
+  // CHECK-NEXT: ret void
+  // CHECK-NEXT: }
+  %count = nvvm.read.ptx.sreg.ntid.x : i32
+  %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 3fc09f371a347..1ec55408e97a5 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -531,19 +531,6 @@ llvm.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32)
   llvm.return
 }
 
-// CHECK-LABEL: @cp_async_mbarrier_arrive
-llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) {
-  // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %{{.*}})
-  nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr
-  // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}})
-  nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr
-  // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}})
-  nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3>
-  // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}})
-  nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3>
-  llvm.return
-}
-
 // CHECK-LABEL: @llvm_nvvm_setmaxregister
 llvm.func @llvm_nvvm_setmaxregister() {
   // CHECK: call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 256)
diff --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir
index 42cec68b9fbbb..8da9109a32762 100644
--- a/mlir/test/Transforms/test-legalizer-full.mlir
+++ b/mlir/test/Transforms/test-legalizer-full.mlir
@@ -72,3 +72,21 @@ builtin.module {
   }
 
 }
+
+// -----
+
+// The region of "test.post_order_legalization" is converted before the op.
+
+// expected-remark@+1 {{applyFullConversion failed}}
+builtin.module {
+func.func @test_preorder_legalization() {
+  // expected-error@+1 {{failed to legalize operation 'test.post_order_legalization'}}
+  "test.post_order_legalization"() ({
+  ^bb0(%arg0: i64):
+    // Not-explicitly-legal ops are not allowed to survive.
+    "test.remaining_consumer"(%arg0) : (i64) -> ()
+    "test.invalid"(%arg0) : (i64) -> ()
+  }) : () -> ()
+  return
+}
+}
diff --git a/mlir/test/Transforms/test-legalizer-rollback.mlir b/mlir/test/Transforms/test-legalizer-rollback.mlir
index 71e11782e14b0..4bcca6b7e5228 100644
--- a/mlir/test/Transforms/test-legalizer-rollback.mlir
+++ b/mlir/test/Transforms/test-legalizer-rollback.mlir
@@ -163,3 +163,22 @@ func.func @create_unregistered_op_in_pattern() -> i32 {
   "test.return"(%0) : (i32) -> ()
 }
 }
+
+// -----
+
+// CHECK-LABEL: func @test_failed_preorder_legalization
+//       CHECK:   "test.post_order_legalization"() ({
+//       CHECK:     %[[r:.*]] = "test.illegal_op_g"() : () -> i32
+//       CHECK:     "test.return"(%[[r]]) : (i32) -> ()
+//       CHECK:   }) : () -> ()
+// expected-remark @+1 {{applyPartialConversion failed}}
+module {
+func.func @test_failed_preorder_legalization() {
+  // expected-error @+1 {{failed to legalize operation 'test.post_order_legalization' that was explicitly marked illegal}}
+  "test.post_order_legalization"() ({
+    %0 = "test.illegal_op_g"() : () -> (i32)
+    "test.return"(%0) : (i32) -> ()
+  }) : () -> ()
+  return
+}
+}
diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir
index 7c43bb7bface0..88a71cc26ab0c 100644
--- a/mlir/test/Transforms/test-legalizer.mlir
+++ b/mlir/test/Transforms/test-legalizer.mlir
@@ -448,3 +448,35 @@ func.func @test_working_1to1_pattern(%arg0: f16) {
   "test.type_consumer"(%arg0) : (f16) -> ()
   "test.return"() : () -> ()
 }
+
+// -----
+
+// The region of "test.post_order_legalization" is converted before the op.
+
+// CHECK: notifyBlockInserted into test.post_order_legalization: was unlinked
+// CHECK: notifyOperationInserted: test.invalid
+// CHECK: notifyBlockErased
+// CHECK: notifyOperationInserted: test.valid, was unlinked
+// CHECK: notifyOperationReplaced: test.invalid
+// CHECK: notifyOperationErased: test.invalid
+// CHECK: notifyOperationModified: test.post_order_legalization
+
+// CHECK-LABEL: func @test_preorder_legalization
+//       CHECK:   "test.post_order_legalization"() ({
+//       CHECK:   ^{{.*}}(%[[arg0:.*]]: f64):
+// Note: The survival of a not-explicitly-invalid operation does *not* cause
+// a conversion failure in when applying a partial conversion.
+//       CHECK:     %[[cast:.*]] = "test.cast"(%[[arg0]]) : (f64) -> i64
+//       CHECK:     "test.remaining_consumer"(%[[cast]]) : (i64) -> ()
+//       CHECK:     "test.valid"(%[[arg0]]) : (f64) -> ()
+//       CHECK:   }) {is_legal} : () -> ()
+func.func @test_preorder_legalization() {
+  "test.post_order_legalization"() ({
+  ^bb0(%arg0: i64):
+    // expected-remark @+1 {{'test.remaining_consumer' is not legalizable}}
+    "test.remaining_consumer"(%arg0) : (i64) -> ()
+    "test.invalid"(%arg0) : (i64) -> ()
+  }) : () -> ()
+  // expected-remark @+1 {{'func.return' is not legalizable}}
+  return
+}
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
index 12edecc113495..9b64bc691588d 100644
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -1418,6 +1418,22 @@ class TestTypeConsumerOpPattern
   }
 };
 
+class TestPostOrderLegalization : public ConversionPattern {
+public:
+  TestPostOrderLegalization(MLIRContext *ctx, const TypeConverter &converter)
+      : ConversionPattern(converter, "test.post_order_legalization", 1, ctx) {}
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<ValueRange> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    for (Region &r : op->getRegions())
+      if (failed(rewriter.legalize(&r)))
+        return failure();
+    rewriter.modifyOpInPlace(
+        op, [&]() { op->setAttr("is_legal", rewriter.getUnitAttr()); });
+    return success();
+  }
+};
+
 /// Test unambiguous overload resolution of replaceOpWithMultiple. This
 /// function is just to trigger compiler errors. It is never executed.
 [[maybe_unused]] void testReplaceOpWithMultipleOverloads(
@@ -1532,7 +1548,8 @@ struct TestLegalizePatternDriver
     patterns.add<TestDropOpSignatureConversion, TestDropAndReplaceInvalidOp,
                  TestPassthroughInvalidOp, TestMultiple1ToNReplacement,
                  TestValueReplace, TestReplaceWithValidConsumer,
-                 TestTypeConsumerOpPattern>(&getContext(), converter);
+                 TestTypeConsumerOpPattern, TestPostOrderLegalization>(
+        &getContext(), converter);
     patterns.add<TestConvertBlockArgs>(converter, &getContext());
     mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns,
                                                               converter);
@@ -1560,6 +1577,9 @@ struct TestLegalizePatternDriver
     target.addDynamicallyLegalOp(
         OperationName("test.value_replace", &getContext()),
         [](Operation *op) { return op->hasAttr("is_legal"); });
+    target.addDynamicallyLegalOp(
+        OperationName("test.post_order_legalization", &getContext()),
+        [](Operation *op) { return op->hasAttr("is_legal"); });
 
     // TestCreateUnregisteredOp creates `arith.constant` operation,
     // which was not added to target intentionally to test
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 0888ebd7a9362..cb08397c201f2 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -927,9 +927,24 @@ static void runIslScheduleOptimizer(
     walkScheduleTreeForStatistics(Schedule, 2);
   }
 
+  // Check for why any computation could have failed
   if (MaxOpGuard.hasQuotaExceeded()) {
     POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n");
     return;
+  } else if (isl_ctx_last_error(Ctx) != isl_error_none) {
+    const char *File = isl_ctx_last_error_file(Ctx);
+    int Line = isl_ctx_last_error_line(Ctx);
+    const char *Msg = isl_ctx_last_error_msg(Ctx);
+    POLLY_DEBUG(
+        dbgs()
+        << "ISL reported an error during the computation of a new schedule at "
+        << File << ":" << Line << ": " << Msg);
+    isl_ctx_reset_error(Ctx);
+    return;
+  } else if (Schedule.is_null()) {
+    POLLY_DEBUG(dbgs() << "Schedule optimizer did not compute a new schedule "
+                          "for unknown reasons\n");
+    return;
   }
 
   // Skip profitability check if user transformation(s) have been applied.
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 5a1e0b53b021c..8d225d63cdf3e 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2964,6 +2964,22 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_exp2m1f16",
+    hdrs = ["src/__support/math/exp2m1f16.h"],
+    deps = [
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_fma",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_math_common_constants",
+        ":__support_math_expxf16_utils",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_exp10",
     hdrs = ["src/__support/math/exp10.h"],
@@ -3762,7 +3778,7 @@ libc_math_function(
 libc_math_function(
     name = "exp2m1f16",
     additional_deps = [
-        ":__support_math_expxf16_utils",
+        ":__support_math_exp2m1f16",
     ],
 )