From 908c3542e39ff0a63c4abdc80aa4c14fe26168db Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Mon, 15 Apr 2024 23:44:53 -0400
Subject: [PATCH 01/58] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?=
 =?UTF-8?q?initial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/lib/Target/Sparc/SparcInstrInfo.td     | 30 +++++++++----------
 llvm/lib/Target/X86/X86InstrCMovSetCC.td    |  4 +--
 llvm/test/TableGen/def-multiple-operands.td | 33 +++++++++++++++++++++
 llvm/utils/TableGen/InstrInfoEmitter.cpp    |  8 ++++-
 4 files changed, 57 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/TableGen/def-multiple-operands.td

diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e792427cca28..4d68f93efeac1 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
 }
 
 let rd = 0 in {
-  let Defs = [CPSR] in {
-    def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPSR] in {
+    def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
-    def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
   }
-  let Defs = [CPQ] in {
-    def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPQ] in {
+    def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
-    def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
   }
 }
 
 let rd = 0 in {
-  let Defs = [FSR] in {
-    def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [FSR] in {
+    def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
-    def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
   }
-  let Defs = [FQ] in {
-    def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Defs = [FQ] in {
+    def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
-    def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
   }
 }
-let rd = 1, Defs = [FSR] in {
-  def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+let rd = 1, mayStore = 1, Uses = [FSR] in {
+  def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
-  def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+  def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 27a0c889a4da3..e27aa4115990e 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
 }
 let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault],
     Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in
-  def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst),
-                (ins t.RegClass:$src1, ccode:$cond),
+  def mr : ITy<0x40, MRMDestMemCC, t, (outs),
+                (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond),
                 "cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF;
 }
 
diff --git a/llvm/test/TableGen/def-multiple-operands.td b/llvm/test/TableGen/def-multiple-operands.td
new file mode 100644
index 0000000000000..dbee7cfacdf7d
--- /dev/null
+++ b/llvm/test/TableGen/def-multiple-operands.td
@@ -0,0 +1,33 @@
+// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo {}
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+def R0 : Register<"r0">;
+def P0 : Register<"p0">;
+def R32 : RegisterClass<"MyNS", [i32], 0, (add R0)>;
+def P1 : RegisterClass<"MyNS", [i1], 0, (add P0)>;
+
+def Reg3Opnd : Operand<OtherVT> {
+  let MIOperandInfo = (ops R32, R32, P1);
+}
+
+// CHECK: archInstrTable {{.* = \{}}
+// CHECK: {{\{}}
+// CHECK: {{\{}} [[ID:[0-9]+]], 4, 3, 13, {{.+\}, \/\/}}
+// CHECK-SAME: Inst #[[ID]] = InstA
+def InstA : Instruction {
+  let Namespace = "MyNS";
+  let Size = 13;
+  // InstA should have 3 defs out of 4 operands.
+  let OutOperandList = (outs Reg3Opnd:$dst);
+  let InOperandList = (ins i32imm:$c);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+  let hasSideEffects = false;
+}
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 36f8fa1465393..b3a05e081f637 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -1181,9 +1181,15 @@ void InstrInfoEmitter::emitRecord(
     // Each logical operand can be multiple MI operands.
     MinOperands =
         Inst.Operands.back().MIOperandNo + Inst.Operands.back().MINumOperands;
+  // Even the logical output operand may be multiple MI operands.
+  int DefOperands = 0;
+  if (Inst.Operands.NumDefs) {
+    auto &Opnd = Inst.Operands[Inst.Operands.NumDefs - 1];
+    DefOperands = Opnd.MIOperandNo + Opnd.MINumOperands;
+  }
 
   OS << "    { ";
-  OS << Num << ",\t" << MinOperands << ",\t" << Inst.Operands.NumDefs << ",\t"
+  OS << Num << ",\t" << MinOperands << ",\t" << DefOperands << ",\t"
      << Inst.TheDef->getValueAsInt("Size") << ",\t"
      << SchedModels.getSchedClassIdx(Inst) << ",\t";
 

From 5a34ff12b8f4a73f5dcd4be1b2575dc38cf13bee Mon Sep 17 00:00:00 2001
From: Jeremy Kun <jkun@google.com>
Date: Tue, 16 Apr 2024 07:35:36 -0700
Subject: [PATCH 02/58] fix Polynomial.td doc filename (#88900)

Not sure how best to test this, but I think it fixes the error
https://github.com/llvm/mlir-www/actions/runs/8699908058/job/23859264085#step:7:1111

Co-authored-by: Jeremy Kun <j2kun@users.noreply.github.com>
Co-authored-by: Jacques Pienaar <jpienaar@google.com>
---
 mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
index d8039deb5ee21..dd0384d8b79d6 100644
--- a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
@@ -1,8 +1,8 @@
 add_mlir_dialect(Polynomial polynomial)
-add_mlir_doc(PolynomialDialect PolynomialDialect Polynomial/ -gen-dialect-doc)
-add_mlir_doc(PolynomialOps PolynomialOps Polynomial/ -gen-op-doc)
-add_mlir_doc(PolynomialAttributes PolynomialAttributes Dialects/ -gen-attrdef-doc)
-add_mlir_doc(PolynomialTypes PolynomialTypes Dialects/ -gen-typedef-doc)
+add_mlir_doc(Polynomial PolynomialDialect Polynomial/ -gen-dialect-doc)
+add_mlir_doc(Polynomial PolynomialOps Polynomial/ -gen-op-doc)
+add_mlir_doc(Polynomial PolynomialAttributes Dialects/ -gen-attrdef-doc)
+add_mlir_doc(Polynomial PolynomialTypes Dialects/ -gen-typedef-doc)
 
 set(LLVM_TARGET_DEFINITIONS Polynomial.td)
 mlir_tablegen(PolynomialAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=polynomial)

From b63247627c9e87e898dec5bf0bea255b3f0eec5c Mon Sep 17 00:00:00 2001
From: Ding Fei <fding@feysh.com>
Date: Tue, 16 Apr 2024 22:38:27 +0800
Subject: [PATCH 03/58] [AST][RecoveryExpr] Fix a crash on c89/c90 invalid
 InitListExpr (#88008) (#88014)

Use refactored `CheckForConstantInitializer()` to skip checking expr
with error.

---------

Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
---
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/include/clang/Sema/Sema.h               |  4 ++-
 clang/lib/Sema/SemaDecl.cpp                   | 28 ++++++++-----------
 clang/lib/Sema/SemaExpr.cpp                   |  2 +-
 .../test/Sema/recover-expr-gh88008-nocrash.c  | 11 ++++++++
 5 files changed, 28 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/Sema/recover-expr-gh88008-nocrash.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index db90db6fa4ab0..d8ec8bcb8df53 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -417,6 +417,8 @@ Bug Fixes in This Version
 - Fixed a regression in CTAD that a friend declaration that befriends itself may cause
   incorrect constraint substitution. (#GH86769).
 
+- Fixed an assertion failure on invalid InitListExpr in C89 mode (#GH88008).
+
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a5fe83a539aaf..77150a318ee47 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,6 +55,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/SemaBase.h"
 #include "clang/Sema/SemaConcept.h"
+#include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/TypoCorrection.h"
 #include "clang/Sema/Weak.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -3427,7 +3428,8 @@ class Sema final : public SemaBase {
       bool ConstexprSupported, bool CLinkageMayDiffer);
 
   /// type checking declaration initializers (C99 6.7.8)
-  bool CheckForConstantInitializer(Expr *e, QualType t);
+  bool CheckForConstantInitializer(
+      Expr *Init, unsigned DiagID = diag::err_init_element_not_constant);
 
   QualType deduceVarTypeFromInitializer(VarDecl *VDecl, DeclarationName Name,
                                         QualType Type, TypeSourceInfo *TSI,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 390da508518e1..745cf41e204e7 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12671,7 +12671,7 @@ void Sema::CheckMSVCRTEntryPoint(FunctionDecl *FD) {
   }
 }
 
-bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) {
+bool Sema::CheckForConstantInitializer(Expr *Init, unsigned DiagID) {
   // FIXME: Need strict checking.  In C89, we need to check for
   // any assignment, increment, decrement, function-calls, or
   // commas outside of a sizeof.  In C99, it's the same list,
@@ -12689,8 +12689,7 @@ bool Sema::CheckForConstantInitializer(Expr *Init, QualType DclT) {
   const Expr *Culprit;
   if (Init->isConstantInitializer(Context, false, &Culprit))
     return false;
-  Diag(Culprit->getExprLoc(), diag::err_init_element_not_constant)
-    << Culprit->getSourceRange();
+  Diag(Culprit->getExprLoc(), DiagID) << Culprit->getSourceRange();
   return true;
 }
 
@@ -13808,29 +13807,24 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
     // OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized.
     // This is true even in C++ for OpenCL.
     } else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) {
-      CheckForConstantInitializer(Init, DclT);
+      CheckForConstantInitializer(Init);
 
-    // Otherwise, C++ does not restrict the initializer.
+      // Otherwise, C++ does not restrict the initializer.
     } else if (getLangOpts().CPlusPlus) {
       // do nothing
 
     // C99 6.7.8p4: All the expressions in an initializer for an object that has
     // static storage duration shall be constant expressions or string literals.
     } else if (VDecl->getStorageClass() == SC_Static) {
-      CheckForConstantInitializer(Init, DclT);
+      CheckForConstantInitializer(Init);
 
-    // C89 is stricter than C99 for aggregate initializers.
-    // C89 6.5.7p3: All the expressions [...] in an initializer list
-    // for an object that has aggregate or union type shall be
-    // constant expressions.
+      // C89 is stricter than C99 for aggregate initializers.
+      // C89 6.5.7p3: All the expressions [...] in an initializer list
+      // for an object that has aggregate or union type shall be
+      // constant expressions.
     } else if (!getLangOpts().C99 && VDecl->getType()->isAggregateType() &&
                isa<InitListExpr>(Init)) {
-      const Expr *Culprit;
-      if (!Init->isConstantInitializer(Context, false, &Culprit)) {
-        Diag(Culprit->getExprLoc(),
-             diag::ext_aggregate_init_not_constant)
-          << Culprit->getSourceRange();
-      }
+      CheckForConstantInitializer(Init, diag::ext_aggregate_init_not_constant);
     }
 
     if (auto *E = dyn_cast<ExprWithCleanups>(Init))
@@ -13963,7 +13957,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
     // Avoid duplicate diagnostics for constexpr variables.
     if (!getLangOpts().CPlusPlus && !VDecl->isInvalidDecl() &&
         !VDecl->isConstexpr())
-      CheckForConstantInitializer(Init, DclT);
+      CheckForConstantInitializer(Init);
   }
 
   QualType InitType = Init->getType();
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cabffa47c9318..7c3faba0f7881 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7331,7 +7331,7 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo,
     if (!LiteralExpr->isTypeDependent() &&
         !LiteralExpr->isValueDependent() &&
         !literalType->isDependentType()) // C99 6.5.2.5p3
-      if (CheckForConstantInitializer(LiteralExpr, literalType))
+      if (CheckForConstantInitializer(LiteralExpr))
         return ExprError();
   } else if (literalType.getAddressSpace() != LangAS::opencl_private &&
              literalType.getAddressSpace() != LangAS::Default) {
diff --git a/clang/test/Sema/recover-expr-gh88008-nocrash.c b/clang/test/Sema/recover-expr-gh88008-nocrash.c
new file mode 100644
index 0000000000000..5500b33dd0e85
--- /dev/null
+++ b/clang/test/Sema/recover-expr-gh88008-nocrash.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -std=c90
+
+struct S {
+  int v;
+};
+
+struct T; // expected-note {{forward declaration of 'struct T'}}
+
+void gh88008_nocrash(struct T *t) {
+  struct S s = { .v = t->y }; // expected-error {{incomplete definition of type 'struct T'}}
+}

From d2d4a1bbdc455a30d600743eb59fb1c69205967a Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy@outlook.com>
Date: Tue, 16 Apr 2024 22:52:19 +0800
Subject: [PATCH 04/58] Revert "[JumpThreading] Thread over BB with only an
 unconditional branch" (#88907)

Reverts llvm/llvm-project#86312
---
 llvm/lib/Transforms/Utils/Local.cpp           |  10 +-
 llvm/test/CodeGen/AArch64/and-sink.ll         |   9 +-
 .../AArch64/combine-comparisons-by-cse.ll     | 122 ++++++++++-------
 llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll   |  18 ++-
 llvm/test/Transforms/JumpThreading/pr79175.ll |   8 +-
 llvm/test/Transforms/JumpThreading/select.ll  |  50 ++++---
 .../Transforms/JumpThreading/thread-prob-7.ll |   8 +-
 .../Transforms/JumpThreading/uncond-no-phi.ll | 123 ------------------
 .../PhaseOrdering/thread-uncond-bb.ll         |  62 ---------
 9 files changed, 126 insertions(+), 284 deletions(-)
 delete mode 100644 llvm/test/Transforms/JumpThreading/uncond-no-phi.ll
 delete mode 100644 llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index baec51a07fcbf..a42ef0c4e6ae9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1019,14 +1019,12 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
                                 const SmallPtrSetImpl<BasicBlock *> &SuccPreds,
                                 BasicBlock *&CommonPred) {
 
-  // When Succ has no phis, BB may be merged into Succ directly. We don't need
-  // to redirect the predecessors of BB in this case.
-  if (Succ->phis().empty())
+  // There must be phis in BB, otherwise BB will be merged into Succ directly
+  if (BB->phis().empty() || Succ->phis().empty())
     return false;
 
-  // BB must have multiple different predecessors, so that at least one of
-  // predecessors can be redirected to Succ, except the common predecessor.
-  if (BB->getUniquePredecessor() || pred_empty(BB))
+  // BB must have predecessors not shared that can be redirected to Succ
+  if (!BB->hasNPredecessorsOrMore(2))
     return false;
 
   // Get single common predecessors of both BB and Succ
diff --git a/llvm/test/CodeGen/AArch64/and-sink.ll b/llvm/test/CodeGen/AArch64/and-sink.ll
index a57e9d54f3078..f298a55dab721 100644
--- a/llvm/test/CodeGen/AArch64/and-sink.ll
+++ b/llvm/test/CodeGen/AArch64/and-sink.ll
@@ -11,14 +11,15 @@
 define dso_local i32 @and_sink1(i32 %a, i1 %c) {
 ; CHECK-LABEL: and_sink1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w1, #0, .LBB0_2
+; CHECK-NEXT:    tbz w1, #0, .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %bb0
-; CHECK-NEXT:    tst w0, #0x4
 ; CHECK-NEXT:    adrp x8, A
-; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    str wzr, [x8, :lo12:A]
+; CHECK-NEXT:    tbnz w0, #2, .LBB0_3
+; CHECK-NEXT:  // %bb.2:
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:  .LBB0_3: // %bb2
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index dde3e81833a63..6449c3e11d667 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w9, [x8]
+; CHECK-NEXT:    ldr w8, [x8]
+; CHECK-NEXT:    cmp w8, #10
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
-; CHECK-NEXT:    cmp w9, #10
 ; CHECK-NEXT:    b.le .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x9, :got:c
@@ -29,17 +29,18 @@ define i32 @combine_gt_ge_10() #0 {
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_3: // %lor.lhs.false
-; CHECK-NEXT:    cmp w9, #10
-; CHECK-NEXT:    b.lt .LBB0_5
+; CHECK-NEXT:    b.lt .LBB0_6
 ; CHECK-NEXT:  .LBB0_4: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    b.ne .LBB0_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_5:
+; CHECK-NEXT:  .LBB0_6: // %if.end
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -144,10 +145,10 @@ define i32 @combine_lt_ge_5() #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w9, [x8]
+; CHECK-NEXT:    ldr w8, [x8]
+; CHECK-NEXT:    cmp w8, #5
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
-; CHECK-NEXT:    cmp w9, #5
 ; CHECK-NEXT:    b.ge .LBB2_3
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    adrp x9, :got:c
@@ -160,17 +161,18 @@ define i32 @combine_lt_ge_5() #0 {
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB2_3: // %lor.lhs.false
-; CHECK-NEXT:    cmp w9, #5
-; CHECK-NEXT:    b.gt .LBB2_5
+; CHECK-NEXT:    b.gt .LBB2_6
 ; CHECK-NEXT:  .LBB2_4: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    b.ne .LBB2_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB2_5:
+; CHECK-NEXT:  .LBB2_6: // %if.end
 ; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
@@ -497,17 +499,24 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
 ; CHECK-NEXT:  // %bb.3: // %while.cond.while.end_crit_edge
 ; CHECK-NEXT:    ldr w8, [x19]
 ; CHECK-NEXT:  .LBB7_4: // %while.end
-; CHECK-NEXT:    adrp x9, :got:b
-; CHECK-NEXT:    adrp x10, :got:d
-; CHECK-NEXT:    ldr x9, [x9, :got_lo12:b]
-; CHECK-NEXT:    ldr x10, [x10, :got_lo12:d]
-; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    b.gt .LBB7_7
+; CHECK-NEXT:  // %bb.5: // %land.lhs.true
+; CHECK-NEXT:    adrp x8, :got:b
+; CHECK-NEXT:    adrp x9, :got:d
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    ldr w10, [x10]
-; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    ccmp w8, #2, #0, eq
-; CHECK-NEXT:    mov w8, #123 // =0x7b
-; CHECK-NEXT:    csel w0, w8, wzr, lt
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    b.ne .LBB7_7
+; CHECK-NEXT:  // %bb.6:
+; CHECK-NEXT:    mov w0, #123 // =0x7b
+; CHECK-NEXT:    b .LBB7_8
+; CHECK-NEXT:  .LBB7_7: // %if.end
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:  .LBB7_8: // %return
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w19
@@ -555,42 +564,52 @@ return:                                           ; preds = %if.end, %land.lhs.t
 define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
 ; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    b.gt .LBB8_4
+; CHECK-NEXT:    b.gt .LBB8_3
 ; CHECK-NEXT:  // %bb.1: // %while.body.preheader
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    sub w19, w8, #1
 ; CHECK-NEXT:  .LBB8_2: // %while.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    bl do_something
 ; CHECK-NEXT:    adds w19, w19, #1
 ; CHECK-NEXT:    b.mi .LBB8_2
-; CHECK-NEXT:  // %bb.3:
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w19
-; CHECK-NEXT:    .cfi_restore w30
-; CHECK-NEXT:  .LBB8_4: // %while.end
+; CHECK-NEXT:  .LBB8_3: // %while.end
+; CHECK-NEXT:    adrp x8, :got:c
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:c]
+; CHECK-NEXT:    ldr w8, [x8]
+; CHECK-NEXT:    cmn w8, #2
+; CHECK-NEXT:    b.lt .LBB8_6
+; CHECK-NEXT:  // %bb.4: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
-; CHECK-NEXT:    adrp x10, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
 ; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
-; CHECK-NEXT:    ldr x10, [x10, :got_lo12:c]
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
-; CHECK-NEXT:    ldr w10, [x10]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    mov w8, #-3 // =0xfffffffd
-; CHECK-NEXT:    ccmp w10, w8, #4, eq
-; CHECK-NEXT:    mov w8, #123 // =0x7b
-; CHECK-NEXT:    csel w0, w8, wzr, gt
+; CHECK-NEXT:    b.ne .LBB8_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    mov w0, #123 // =0x7b
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB8_6: // %if.end
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @a, align 4
@@ -763,14 +782,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel x9, x0, xzr, gt
 ; CHECK-NEXT:    str x9, [x1]
-; CHECK-NEXT:    b.le .LBB11_3
+; CHECK-NEXT:    b.le .LBB11_2
 ; CHECK-NEXT:  // %bb.1: // %lor.lhs.false
 ; CHECK-NEXT:    cmp w8, #2
-; CHECK-NEXT:    b.ge .LBB11_5
-; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB11_3: // %land.lhs.true
+; CHECK-NEXT:    b.ge .LBB11_4
+; CHECK-NEXT:    b .LBB11_6
+; CHECK-NEXT:  .LBB11_2: // %land.lhs.true
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:c
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
@@ -778,11 +795,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne .LBB11_5
-; CHECK-NEXT:  // %bb.4:
+; CHECK-NEXT:    b.ne .LBB11_4
+; CHECK-NEXT:  // %bb.3:
 ; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB11_5: // %land.lhs.true3
+; CHECK-NEXT:  .LBB11_4: // %land.lhs.true3
 ; CHECK-NEXT:    adrp x8, :got:b
 ; CHECK-NEXT:    adrp x9, :got:d
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
@@ -790,7 +807,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
 ; CHECK-NEXT:    ldr w8, [x8]
 ; CHECK-NEXT:    ldr w9, [x9]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    b.ne .LBB11_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB11_6: // %if.end
+; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @a, align 4
diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
index c33c81841be65..dddc4bd953d7a 100644
--- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
@@ -10,13 +10,12 @@ define i32 @fred(ptr %a0) #0 {
 ; CHECK-LABEL: fred:
 ; CHECK:       // %bb.0: // %b0
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r1:0 = combine(r0,#0)
-; CHECK-NEXT:     if (p0) jumpr r31
+; CHECK-NEXT:     if (p0) jump:nt .LBB0_2
 ; CHECK-NEXT:    }
-; CHECK-NEXT:  .LBB0_1: // %b2
+; CHECK-NEXT:  // %bb.1: // %b2
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     r3:2 = combine(#0,#0)
-; CHECK-NEXT:     r1:0 = memd(r1+#0)
+; CHECK-NEXT:     r1:0 = memd(r0+#0)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     p0 = vcmph.eq(r1:0,r3:2)
@@ -28,7 +27,16 @@ define i32 @fred(ptr %a0) #0 {
 ; CHECK-NEXT:     r0 = and(r0,#1)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = !cmp.eq(r0,#11)
+; CHECK-NEXT:     p0 = cmp.eq(r0,#11)
+; CHECK-NEXT:     r0 = #1
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     if (p0) r0 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+; CHECK-NEXT:  .LBB0_2: // %b14
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = #0
 ; CHECK-NEXT:     jumpr r31
 ; CHECK-NEXT:    }
 b0:
diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll
index cce30ce079999..2c7ee0770cdc7 100644
--- a/llvm/test/Transforms/JumpThreading/pr79175.ll
+++ b/llvm/test/Transforms/JumpThreading/pr79175.ll
@@ -17,11 +17,11 @@ define i32 @test(i64 %idx, i32 %val) {
 ; CHECK:       cond.end:
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0
 ; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
-; CHECK-NEXT:    br i1 [[COND_FR]], label [[TMP0:%.*]], label [[COND_END_THREAD]]
-; CHECK:       0:
-; CHECK-NEXT:    br label [[COND_END_THREAD]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
 ; CHECK:       cond.end.thread:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[VAL]], [[COND_END]] ], [ 0, [[TMP0]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[TMP0]]
+; CHECK:       0:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
 ; CHECK-NEXT:    [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[F_IDX]], align 4
 ; CHECK-NEXT:    [[F_RELOAD:%.*]] = load i32, ptr @f, align 4
diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll
index 27ebf4c25da50..4ec55a66bb8ac 100644
--- a/llvm/test/Transforms/JumpThreading/select.ll
+++ b/llvm/test/Transforms/JumpThreading/select.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes="jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck %s
-; RUN: opt -S -passes="require<branch-prob>,jump-threading" -debug-only=branch-prob  -disable-output < %s 2>&1 | FileCheck -check-prefix=CHECK-BPI %s
+; RUN: opt -S -passes="require<branch-prob>,jump-threading" -debug-only=branch-prob < %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECK-BPI %s
 ; REQUIRES: asserts
 
 ; CHECK-BPI-LABEL:  ---- Branch Probability Info : unfold1 ----
@@ -21,7 +21,7 @@ declare void @quux()
 ; booleans where at least one operand is true/false/undef.
 
 ;.
-; CHECK: @anchor = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, %L1), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, %L3)]
+; CHECK: @[[ANCHOR:[a-zA-Z0-9_$"\\.-]+]] = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, [[L1:%.*]]), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, [[L3:%.*]])]
 ;.
 define void @test_br(i1 %cond, i1 %value) nounwind {
 ; CHECK-LABEL: @test_br(
@@ -66,8 +66,8 @@ define void @test_switch(i1 %cond, i8 %value) nounwind {
 ; CHECK-NEXT:    call void @quux()
 ; CHECK-NEXT:    [[EXPR:%.*]] = select i1 [[COND]], i8 1, i8 [[VALUE:%.*]]
 ; CHECK-NEXT:    switch i8 [[EXPR]], label [[L3:%.*]] [
-; CHECK-NEXT:      i8 1, label [[L1]]
-; CHECK-NEXT:      i8 2, label [[L2:%.*]]
+; CHECK-NEXT:    i8 1, label [[L1]]
+; CHECK-NEXT:    i8 2, label [[L2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       L1:
 ; CHECK-NEXT:    call void @foo()
@@ -192,8 +192,8 @@ define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind {
 ; CHECK:       0:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ]
 ; CHECK-NEXT:    switch i8 [[TMP1]], label [[L3:%.*]] [
-; CHECK-NEXT:      i8 1, label [[L1]]
-; CHECK-NEXT:      i8 2, label [[L2:%.*]]
+; CHECK-NEXT:    i8 1, label [[L1]]
+; CHECK-NEXT:    i8 2, label [[L2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       L1:
 ; CHECK-NEXT:    call void @foo()
@@ -237,8 +237,8 @@ define void @test_switch_default(ptr nocapture %status) nounwind {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[STATUS:%.*]], align 4
 ; CHECK-NEXT:    switch i32 [[TMP0]], label [[L2:%.*]] [
-; CHECK-NEXT:      i32 5061, label [[L2_THREAD:%.*]]
-; CHECK-NEXT:      i32 0, label [[L2]]
+; CHECK-NEXT:    i32 5061, label [[L2_THREAD:%.*]]
+; CHECK-NEXT:    i32 0, label [[L2]]
 ; CHECK-NEXT:    ]
 ; CHECK:       L2.thread:
 ; CHECK-NEXT:    store i32 10025, ptr [[STATUS]], align 4
@@ -377,21 +377,21 @@ define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun
 ; CHECK-NEXT:    br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]]
 ; CHECK:       cond.false.i:
 ; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
-; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_6_I:%.*]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]]
 ; CHECK:       cond.false.6.i:
 ; CHECK-NEXT:    [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]]
 ; CHECK:       cond.false.10.i:
 ; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
-; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD4]], label [[DOTEXIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]]
 ; CHECK:       .exit:
 ; CHECK-NEXT:    [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
 ; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[PHITMP]]
-; CHECK-NEXT:    br i1 [[COND_FR]], label [[DOTEXIT_THREAD:%.*]], label [[DOTEXIT_THREAD4]]
-; CHECK:       0:
-; CHECK-NEXT:    br label [[DOTEXIT_THREAD4]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]]
 ; CHECK:       .exit.thread:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[ADD3]], [[DOTEXIT]] ], [ [[J]], [[DOTEXIT_THREAD]] ], [ [[J]], [[COND_FALSE_I]] ], [ [[J]], [[COND_FALSE_10_I]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ]
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD4]]
+; CHECK:       .exit.thread4:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ]
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
@@ -430,23 +430,23 @@ define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) noun
 ; CHECK-NEXT:    br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]]
 ; CHECK:       cond.false.i:
 ; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]]
-; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]]
 ; CHECK:       cond.false.6.i:
 ; CHECK-NEXT:    [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]]
 ; CHECK:       cond.false.10.i:
 ; CHECK-NEXT:    [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]]
-; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]]
 ; CHECK:       .exit:
 ; CHECK-NEXT:    [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32
 ; CHECK-NEXT:    [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1
 ; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]]
-; CHECK-NEXT:    br i1 [[COND_FR]], label [[TMP1:%.*]], label [[DOTEXIT_THREAD]]
-; CHECK:       0:
-; CHECK-NEXT:    br label [[DOTEXIT_THREAD]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]]
 ; CHECK:       .exit.thread:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[ADD3]], [[DOTEXIT]] ], [ [[J]], [[TMP1]] ], [ [[J]], [[ENTRY:%.*]] ], [ [[J]], [[COND_FALSE_6_I]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ]
+; CHECK-NEXT:    br label [[DOTEXIT_THREAD5]]
+; CHECK:       .exit.thread5:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ]
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
@@ -560,10 +560,10 @@ define void @test_func(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[LOCAL_VAR_0:%.*]] = phi i32 [ [[TMP1]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    switch i32 [[LOCAL_VAR_0]], label [[SW_DEFAULT]] [
-; CHECK-NEXT:      i32 2, label [[SW_BB]]
-; CHECK-NEXT:      i32 4, label [[SW_BB7]]
-; CHECK-NEXT:      i32 5, label [[SW_BB8:%.*]]
-; CHECK-NEXT:      i32 7, label [[SW_BB9:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB]]
+; CHECK-NEXT:    i32 4, label [[SW_BB7]]
+; CHECK-NEXT:    i32 5, label [[SW_BB8:%.*]]
+; CHECK-NEXT:    i32 7, label [[SW_BB9:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
 ; CHECK-NEXT:    call void @foo()
@@ -674,5 +674,3 @@ if.end:
 ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1984}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i64 1073741824, i64 3221225472}
 ;.
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-BPI: {{.*}}
diff --git a/llvm/test/Transforms/JumpThreading/thread-prob-7.ll b/llvm/test/Transforms/JumpThreading/thread-prob-7.ll
index 4623a579be48f..8c9d89871d00b 100644
--- a/llvm/test/Transforms/JumpThreading/thread-prob-7.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-prob-7.ll
@@ -14,15 +14,15 @@ define i32 @func0(i32 %a0, i32 %a1) !prof !0 {
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[BB_JOIN_THREAD:%.*]], label [[TEST2_FALSE:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       test2_false:
 ; CHECK-NEXT:    call void @foobar()
-; CHECK-NEXT:    br label [[BB_JOIN_THREAD]]
+; CHECK-NEXT:    br label [[TMP0:%.*]]
 ; CHECK:       bb_join:
 ; CHECK-NEXT:    [[C:%.*]] = phi i1 [ [[CX]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[COND_FR:%.*]] = freeze i1 [[C]]
-; CHECK-NEXT:    br i1 [[COND_FR]], label [[BB_JOIN_THREAD1:%.*]], label [[BB_JOIN_THREAD]], !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND_FR]], label [[BB_JOIN_THREAD]], label [[TMP0]], !prof [[PROF3:![0-9]+]]
 ; CHECK:       bb_join.thread:
-; CHECK-NEXT:    br label [[BB_JOIN_THREAD]]
+; CHECK-NEXT:    br label [[TMP0]]
 ; CHECK:       0:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 7, [[BB_JOIN]] ], [ 7, [[TEST2_FALSE]] ], [ 42, [[TEST2]] ], [ 42, [[BB_JOIN_THREAD1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ 42, [[BB_JOIN_THREAD]] ], [ 7, [[BB_JOIN]] ], [ 7, [[TEST2_FALSE]] ]
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
diff --git a/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll b/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll
deleted file mode 100644
index 6104e8f8778bc..0000000000000
--- a/llvm/test/Transforms/JumpThreading/uncond-no-phi.ll
+++ /dev/null
@@ -1,123 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
-
-define i1 @if_else(i1 %c, i1 %c1) {
-; CHECK-LABEL: define i1 @if_else(
-; CHECK-SAME: i1 [[C:%.*]], i1 [[C1:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[RETURN:%.*]]
-; CHECK:       then:
-; CHECK-NEXT:    call void @dummy()
-; CHECK-NEXT:    br i1 [[C1]], label [[ELSE:%.*]], label [[RETURN]]
-; CHECK:       else:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i1 [ false, [[THEN]] ], [ true, [[ENTRY:%.*]] ], [ true, [[ELSE]] ]
-; CHECK-NEXT:    ret i1 [[RETVAL_0]]
-;
-entry:
-  br i1 %c, label %then, label %else
-
-then:
-  call void @dummy()
-  br i1 %c1, label %else, label %return
-
-else:
-  br label %return
-
-return:
-  %retval.0 = phi i1 [ true, %else ], [ false, %then ]
-  ret i1 %retval.0
-}
-
-define i8 @switch_uncond(i8 %arg) {
-; CHECK-LABEL: define i8 @switch_uncond(
-; CHECK-SAME: i8 [[ARG:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[ARG]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:      i8 0, label [[BB1:%.*]]
-; CHECK-NEXT:      i8 1, label [[BB3:%.*]]
-; CHECK-NEXT:      i8 2, label [[BB2:%.*]]
-; CHECK-NEXT:      i8 3, label [[END:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       default:
-; CHECK-NEXT:    unreachable
-; CHECK:       bb:
-; CHECK-NEXT:    call void @dummy()
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       bb1:
-; CHECK-NEXT:    call void @dummy()
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 1, [[ENTRY:%.*]] ], [ 0, [[BB3]] ], [ 0, [[BB1]] ], [ 0, [[BB2]] ]
-; CHECK-NEXT:    ret i8 [[PHI]]
-;
-entry:
-  switch i8 %arg, label %default [
-  i8 0, label %bb
-  i8 1, label %bb1
-  i8 2, label %bb2
-  i8 3, label %end
-  ]
-
-default:
-  unreachable
-
-bb:
-  call void @dummy()
-  br label %bb2
-
-bb1:
-  call void @dummy()
-  br label %bb2
-
-; Predecessors of %bb2 are %bb and %bb1, they are not identical.
-; So we can thread %bb2.
-bb2:
-  br label %end
-
-end:
-  %phi = phi i8 [ 0, %bb2 ], [ 1, %entry ]
-  ret i8 %phi
-}
-
-define i8 @switch_uncond_fail(i8 %arg) {
-; CHECK-LABEL: define i8 @switch_uncond_fail(
-; CHECK-SAME: i8 [[ARG:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[ARG]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:      i8 0, label [[BB:%.*]]
-; CHECK-NEXT:      i8 1, label [[BB]]
-; CHECK-NEXT:      i8 2, label [[END:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       default:
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       bb:
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[BB]] ], [ 1, [[ENTRY:%.*]] ], [ 2, [[DEFAULT]] ]
-; CHECK-NEXT:    ret i8 [[PHI]]
-;
-entry:
-  switch i8 %arg, label %default [
-  i8 0, label %bb
-  i8 1, label %bb
-  i8 2, label %end
-  ]
-
-default:
-  br label %end
-
-; Predecessor of %bb is only %entry (though there are two in predecessor list),
-; thus it's unthreadable.
-bb:
-  br label %end
-
-end:
-  %phi = phi i8 [ 0, %bb ], [ 1, %entry ], [ 2, %default ]
-  ret i8 %phi
-}
-
-declare void @dummy()
diff --git a/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll b/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll
deleted file mode 100644
index 17146d7d5987f..0000000000000
--- a/llvm/test/Transforms/PhaseOrdering/thread-uncond-bb.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -O3 -S | FileCheck %s
-
-define i32 @thread_uncond_bb_cmp(i1 %c, i32 %v) {
-; CHECK-LABEL: define i32 @thread_uncond_bb_cmp(
-; CHECK-SAME: i1 [[C:%.*]], i32 [[V:%.*]]) local_unnamed_addr {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C]], label [[DO_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @dummy()
-; CHECK-NEXT:    br label [[DO_END]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V]], [[IF_THEN]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL]]
-;
-entry:
-  br i1 %c, label %do.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  call void @dummy()
-  %tobool = icmp eq i32 %v, 0
-  br i1 %tobool, label %do.end, label %return
-
-do.end:                                           ; preds = %entry, %if.then
-  br label %return
-
-return:                                           ; preds = %if.then, %do.end
-  %retval = phi i32 [ 0, %do.end ], [ %v, %if.then ]
-  ret i32 %retval
-}
-
-define i32 @thread_uncond_bb_cmp_zext(i1 %c, i32 %v) {
-; CHECK-LABEL: define i32 @thread_uncond_bb_cmp_zext(
-; CHECK-SAME: i1 [[C:%.*]], i32 [[V:%.*]]) local_unnamed_addr {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C]], label [[DO_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @dummy()
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[V]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = zext i1 [[TOBOOL]] to i32
-; CHECK-NEXT:    br label [[DO_END]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL]]
-;
-entry:
-  br i1 %c, label %do.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  call void @dummy()
-  %tobool = icmp eq i32 %v, 0
-  br i1 %tobool, label %do.end, label %return
-
-do.end:                                           ; preds = %entry, %if.then
-  br label %return
-
-return:                                           ; preds = %if.then, %do.end
-  %retval = phi i32 [ 0, %do.end ], [ 1, %if.then ]
-  ret i32 %retval
-}
-
-declare void @dummy()

From 22629bb22a1bea95eebfc9b3171005de107c38f1 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 16 Apr 2024 10:54:28 -0400
Subject: [PATCH 05/58] [libc++] Use availability to rely on key functions for
 bad_expected_access and bad_function_call (#87390)

This patch uses our availability machinery to allow defining a key
function for bad_function_call and bad_expected_access at all times but
only rely on it when we can. This prevents compilers from complaining
about weak vtables and reduces code bloat and the amount of work done by
the dynamic linker.

rdar://111917845
---
 libcxx/include/__availability                 | 16 ++++++++++++
 libcxx/include/__config                       | 26 ++++---------------
 .../include/__expected/bad_expected_access.h  | 13 ++++++----
 libcxx/include/__functional/function.h        |  5 +++-
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  4 +++
 ...xxabi.v1.stable.noexceptions.nonew.abilist |  4 +++
 libcxx/src/CMakeLists.txt                     |  1 +
 libcxx/src/expected.cpp                       | 13 ++++++++++
 libcxx/src/functional.cpp                     |  2 --
 16 files changed, 83 insertions(+), 29 deletions(-)
 create mode 100644 libcxx/src/expected.cpp

diff --git a/libcxx/include/__availability b/libcxx/include/__availability
index bb3ed0a8da521..aa761eb5bfe5e 100644
--- a/libcxx/include/__availability
+++ b/libcxx/include/__availability
@@ -160,6 +160,15 @@
 #  define _LIBCPP_AVAILABILITY_HAS_TZDB 1
 #  define _LIBCPP_AVAILABILITY_TZDB
 
+// These macros determine whether we assume that std::bad_function_call and
+// std::bad_expected_access provide a key function in the dylib. This allows
+// centralizing their vtable and typeinfo instead of having all TUs provide
+// a weak definition that then gets deduplicated.
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 1
+#  define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 1
+#  define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION
+
 #elif defined(__APPLE__)
 
 #  define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS                                                                 \
@@ -290,6 +299,13 @@
 #  else
 #    define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1
 #  endif
+
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 0
+#  define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION __attribute__((unavailable))
+
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 0
+#  define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION __attribute__((unavailable))
+
 #else
 
 // ...New vendors can add availability markup here...
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 82782b31c557b..e9fda9cd24ebb 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -120,14 +120,11 @@
 #    define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
 #    define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
 #    define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
-// Define a key function for `bad_function_call` in the library, to centralize
-// its vtable and typeinfo to libc++ rather than having all other libraries
-// using that class define their own copies.
-#    define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
-// Override the default return value of exception::what() for
-// bad_function_call::what() with a string that is specific to
-// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break
-// because it changes the vtable layout of bad_function_call.
+// Override the default return value of exception::what() for bad_function_call::what()
+// with a string that is specific to bad_function_call (see http://wg21.link/LWG2233).
+// This is an ABI break on platforms that sign and authenticate vtable function pointers
+// because it changes the mangling of the virtual function located in the vtable, which
+// changes how it gets signed.
 #    define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
 // Enable optimized version of __do_get_(un)signed which avoids redundant copies.
 #    define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
@@ -197,19 +194,6 @@
 #    if defined(__FreeBSD__) && __FreeBSD__ < 14
 #      define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR
 #    endif
-// For XCOFF linkers, we have problems if we see a weak hidden version of a symbol
-// in user code (like you get with -fvisibility-inlines-hidden) and then a strong def
-// in the library, so we need to always rely on the library version.
-#    if defined(_AIX)
-#      define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
-#    endif
-#  endif
-
-#  if defined(_LIBCPP_BUILDING_LIBRARY) || _LIBCPP_ABI_VERSION >= 2
-// Define a key function for `bad_function_call` in the library, to centralize
-// its vtable and typeinfo to libc++ rather than having all other libraries
-// using that class define their own copies.
-#    define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
 #  endif
 
 // We had some bugs where we use [[no_unique_address]] together with construct_at,
diff --git a/libcxx/include/__expected/bad_expected_access.h b/libcxx/include/__expected/bad_expected_access.h
index 9d490307b6808..ef29fa5088313 100644
--- a/libcxx/include/__expected/bad_expected_access.h
+++ b/libcxx/include/__expected/bad_expected_access.h
@@ -9,6 +9,7 @@
 #ifndef _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H
 #define _LIBCPP___EXPECTED_BAD_EXPECTED_ACCESS_H
 
+#include <__availability>
 #include <__config>
 #include <__exception/exception.h>
 #include <__utility/move.h>
@@ -28,9 +29,11 @@ template <class _Err>
 class bad_expected_access;
 
 _LIBCPP_DIAGNOSTIC_PUSH
+#  if !_LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION
 _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables")
+#  endif
 template <>
-class bad_expected_access<void> : public exception {
+class _LIBCPP_EXPORTED_FROM_ABI bad_expected_access<void> : public exception {
 protected:
   _LIBCPP_HIDE_FROM_ABI bad_expected_access() noexcept                                      = default;
   _LIBCPP_HIDE_FROM_ABI bad_expected_access(const bad_expected_access&) noexcept            = default;
@@ -40,11 +43,11 @@ class bad_expected_access<void> : public exception {
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_expected_access() override                             = default;
 
 public:
-  // The way this has been designed (by using a class template below) means that we'll already
-  // have a profusion of these vtables in TUs, and the dynamic linker will already have a bunch
-  // of work to do. So it is not worth hiding the <void> specialization in the dylib, given that
-  // it adds deployment target restrictions.
+#  if _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION
+  const char* what() const noexcept override;
+#  else
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { return "bad access to std::expected"; }
+#  endif
 };
 _LIBCPP_DIAGNOSTIC_POP
 
diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h
index 1faa9e92ebd63..36057706933d4 100644
--- a/libcxx/include/__functional/function.h
+++ b/libcxx/include/__functional/function.h
@@ -11,6 +11,7 @@
 #define _LIBCPP___FUNCTIONAL_FUNCTION_H
 
 #include <__assert>
+#include <__availability>
 #include <__config>
 #include <__exception/exception.h>
 #include <__functional/binary_function.h>
@@ -55,7 +56,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // bad_function_call
 
 _LIBCPP_DIAGNOSTIC_PUSH
+#  if !_LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION
 _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables")
+#  endif
 class _LIBCPP_EXPORTED_FROM_ABI bad_function_call : public exception {
 public:
   _LIBCPP_HIDE_FROM_ABI bad_function_call() _NOEXCEPT                                    = default;
@@ -64,7 +67,7 @@ class _LIBCPP_EXPORTED_FROM_ABI bad_function_call : public exception {
 // Note that when a key function is not used, every translation unit that uses
 // bad_function_call will end up containing a weak definition of the vtable and
 // typeinfo.
-#  ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
+#  if _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION
   ~bad_function_call() _NOEXCEPT override;
 #  else
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_function_call() _NOEXCEPT override {}
diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index 46353986f5d7d..64cf368e6e684 100644
--- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -575,6 +575,7 @@
 {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '__ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -2073,6 +2074,7 @@
 {'is_defined': True, 'name': '__ZTINSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTINSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'}
@@ -2264,6 +2266,7 @@
 {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb0EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTSNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__13pmr15memory_resourceE', 'size': 0, 'type': 'OBJECT'}
@@ -2482,6 +2485,7 @@
 {'is_defined': True, 'name': '__ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTVNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
index fec3a4505a0c6..8751dffe23025 100644
--- a/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/i686-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -209,6 +209,7 @@
 {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNKSt6__ndk119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE10do_unshiftER9mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -1722,6 +1723,7 @@
 {'is_defined': True, 'name': '_ZTINSt6__ndk118__time_get_storageIwEE', 'size': 12, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119__shared_weak_countE', 'size': 24, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTINSt6__ndk119bad_expected_accessIvEE', 'size': 12, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 12, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 12, 'type': 'OBJECT'}
@@ -1958,6 +1960,7 @@
 {'is_defined': True, 'name': '_ZTSNSt6__ndk118__time_get_storageIwEE', 'size': 35, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 72, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119__shared_weak_countE', 'size': 33, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTSNSt6__ndk119bad_expected_accessIvEE', 'size': 36, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 38, 'type': 'OBJECT'}
@@ -2188,6 +2191,7 @@
 {'is_defined': True, 'name': '_ZTVNSt6__ndk117moneypunct_bynameIwLb1EEE', 'size': 56, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 60, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119__shared_weak_countE', 'size': 28, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTVNSt6__ndk119bad_expected_accessIvEE', 'size': 20, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 48, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
index e52cf98dd4c4f..7e223e6652884 100644
--- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -99,6 +99,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftERPcS2_S2_S3_', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
@@ -910,6 +911,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
@@ -969,6 +971,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117bad_function_callE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
@@ -1031,6 +1034,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
index 52a04706ddf20..407d0456757af 100644
--- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -99,6 +99,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftERPcS2_S2_S3_', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'}
@@ -910,6 +911,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
@@ -969,6 +971,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__117bad_function_callE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RO', 'type': 'OBJECT'}
@@ -1031,6 +1034,7 @@
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117__widen_from_utf8ILm32EEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__117bad_function_callE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
+{'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDsEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
 {'import_export': 'EXP', 'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IwEE', 'storage_mapping_class': 'RW', 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index c169b4a992521..d578b41383c0e 100644
--- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -575,6 +575,7 @@
 {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '__ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '__ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -2087,6 +2088,7 @@
 {'is_defined': True, 'name': '__ZTINSt3__118__time_get_storageIwEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTINSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'}
@@ -2291,6 +2293,7 @@
 {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb0EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTSNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTSNSt3__13pmr15memory_resourceE', 'size': 0, 'type': 'OBJECT'}
@@ -2516,6 +2519,7 @@
 {'is_defined': True, 'name': '__ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119__shared_weak_countE', 'size': 0, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '__ZTVNSt3__119bad_expected_accessIvEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 0, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '__ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 0, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
index efa2189e9c928..fc0f4fcf415e6 100644
--- a/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-linux-android21.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -209,6 +209,7 @@
 {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNKSt6__ndk119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE10do_unshiftER9mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt6__ndk120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -1722,6 +1723,7 @@
 {'is_defined': True, 'name': '_ZTINSt6__ndk118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTINSt6__ndk119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'}
@@ -1955,6 +1957,7 @@
 {'is_defined': True, 'name': '_ZTSNSt6__ndk118__time_get_storageIwEE', 'size': 35, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 72, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119__shared_weak_countE', 'size': 33, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTSNSt6__ndk119bad_expected_accessIvEE', 'size': 36, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 73, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 38, 'type': 'OBJECT'}
@@ -2182,6 +2185,7 @@
 {'is_defined': True, 'name': '_ZTVNSt6__ndk117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTVNSt6__ndk119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt6__ndk120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
index ebda5b0dfba57..4022339562b3a 100644
--- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -266,6 +266,7 @@
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -1695,6 +1696,7 @@
 {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'}
@@ -1829,6 +1831,7 @@
 {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'}
@@ -1962,6 +1965,7 @@
 {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
index 6432ad3be3585..574c4504c59b8 100644
--- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -264,6 +264,7 @@
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -1696,6 +1697,7 @@
 {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'}
@@ -1830,6 +1832,7 @@
 {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'}
@@ -1963,6 +1966,7 @@
 {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'}
diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
index 1fe84e17b3f7f..665546699e8de 100644
--- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist
@@ -235,6 +235,7 @@
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIcE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__118__time_get_storageIwE15__do_date_orderEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__119__shared_weak_count13__get_deleterERKSt9type_info', 'type': 'FUNC'}
+{'is_defined': True, 'name': '_ZNKSt3__119bad_expected_accessIvE4whatEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE10do_unshiftER11__mbstate_tPcS4_RS4_', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE11do_encodingEv', 'type': 'FUNC'}
 {'is_defined': True, 'name': '_ZNKSt3__120__codecvt_utf8_utf16IDiE13do_max_lengthEv', 'type': 'FUNC'}
@@ -1667,6 +1668,7 @@
 {'is_defined': True, 'name': '_ZTINSt3__118__time_get_storageIwEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119__shared_weak_countE', 'size': 40, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTINSt3__119bad_expected_accessIvEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 24, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTINSt3__120__codecvt_utf8_utf16IDiEE', 'size': 24, 'type': 'OBJECT'}
@@ -1801,6 +1803,7 @@
 {'is_defined': True, 'name': '_ZTSNSt3__118__time_get_storageIwEE', 'size': 32, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 69, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119__shared_weak_countE', 'size': 30, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTSNSt3__119bad_expected_accessIvEE', 'size': 33, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 70, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTSNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 35, 'type': 'OBJECT'}
@@ -1934,6 +1937,7 @@
 {'is_defined': True, 'name': '_ZTVNSt3__117moneypunct_bynameIwLb1EEE', 'size': 112, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__118basic_stringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 120, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119__shared_weak_countE', 'size': 56, 'type': 'OBJECT'}
+{'is_defined': True, 'name': '_ZTVNSt3__119bad_expected_accessIvEE', 'size': 40, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_istringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__119basic_ostringstreamIcNS_11char_traitsIcEENS_9allocatorIcEEEE', 'size': 80, 'type': 'OBJECT'}
 {'is_defined': True, 'name': '_ZTVNSt3__120__codecvt_utf8_utf16IDiEE', 'size': 96, 'type': 'OBJECT'}
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 208500ec14fcd..a4a3fee864571 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -10,6 +10,7 @@ set(LIBCXX_SOURCES
   chrono.cpp
   error_category.cpp
   exception.cpp
+  expected.cpp
   filesystem/filesystem_clock.cpp
   filesystem/filesystem_error.cpp
   filesystem/path_parser.h
diff --git a/libcxx/src/expected.cpp b/libcxx/src/expected.cpp
new file mode 100644
index 0000000000000..f30efb5164796
--- /dev/null
+++ b/libcxx/src/expected.cpp
@@ -0,0 +1,13 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <expected>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+const char* bad_expected_access<void>::what() const noexcept { return "bad access to std::expected"; }
+_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/src/functional.cpp b/libcxx/src/functional.cpp
index 570bb78e150b7..ef53e3e84da0e 100644
--- a/libcxx/src/functional.cpp
+++ b/libcxx/src/functional.cpp
@@ -10,9 +10,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
 bad_function_call::~bad_function_call() noexcept {}
-#endif
 
 #ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
 const char* bad_function_call::what() const noexcept { return "std::bad_function_call"; }

From 9ddedf07ed80076e0e419940753aeaaf719a09ec Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 16 Apr 2024 10:57:48 -0400
Subject: [PATCH 06/58] [libc++] Deprecate the C++20 synchronization library
 before C++20 (#86410)

When we initially implemented the C++20 synchronization library, we
reluctantly accepted for the implementation to be backported to C++03
upon request from the person who provided the patch. This was when we
were only starting to have experience with the issues this can create,
so we flinched. Nowadays, we have a much stricter stance about not
backporting features to previous standards.

We have recently started fixing several bugs (and near bugs) in our
implementation of the synchronization library. A recurring theme during
these reviews has been how difficult to understand the current code is,
and upon inspection it becomes clear that being able to use a few recent
C++ features (in particular lambdas) would help a great deal. The code
would still be pretty intricate, but it would be a lot easier to reason
about the flow of callbacks through things like
__thread_poll_with_backoff.

As a result, this patch deprecates support for the synchronization
library before C++20. In the next release, we can remove that support
entirely.
---
 libcxx/.clang-format                          |  1 +
 libcxx/docs/ReleaseNotes/19.rst               |  4 +++
 libcxx/include/__atomic/atomic.h              | 12 ++++---
 libcxx/include/__atomic/atomic_flag.h         | 34 +++++++++++--------
 libcxx/include/__config                       |  8 +++++
 libcxx/include/barrier                        |  2 +-
 libcxx/include/latch                          |  2 +-
 libcxx/include/semaphore                      |  6 ++--
 .../atomic_notify_all.pass.cpp                |  5 ++-
 .../atomic_notify_one.pass.cpp                |  5 ++-
 .../atomic_wait.pass.cpp                      |  5 ++-
 .../atomic_wait_explicit.pass.cpp             |  5 ++-
 .../std/thread/thread.barrier/arrive.pass.cpp |  3 ++
 .../thread.barrier/arrive_and_drop.pass.cpp   |  3 ++
 .../thread.barrier/arrive_and_wait.pass.cpp   |  3 ++
 .../thread/thread.barrier/completion.pass.cpp |  3 ++
 .../thread.barrier/ctor.compile.pass.cpp      |  3 ++
 .../std/thread/thread.barrier/max.pass.cpp    |  3 ++
 .../thread.latch/arrive_and_wait.pass.cpp     |  3 ++
 .../thread/thread.latch/count_down.pass.cpp   |  3 ++
 .../std/thread/thread.latch/ctor.pass.cpp     |  3 ++
 .../test/std/thread/thread.latch/max.pass.cpp |  3 ++
 .../std/thread/thread.latch/try_wait.pass.cpp |  3 ++
 .../thread/thread.semaphore/acquire.pass.cpp  |  3 ++
 .../thread/thread.semaphore/binary.pass.cpp   |  3 ++
 .../thread.semaphore/ctor.compile.pass.cpp    |  3 ++
 .../std/thread/thread.semaphore/max.pass.cpp  |  3 ++
 .../thread/thread.semaphore/release.pass.cpp  |  3 ++
 .../thread/thread.semaphore/timed.pass.cpp    |  3 ++
 .../thread.semaphore/try_acquire.pass.cpp     |  3 ++
 30 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/libcxx/.clang-format b/libcxx/.clang-format
index 39ae1322ffa8a..c37ab817bca90 100644
--- a/libcxx/.clang-format
+++ b/libcxx/.clang-format
@@ -24,6 +24,7 @@ AttributeMacros: [
                   '_LIBCPP_CONSTEXPR_SINCE_CXX23',
                   '_LIBCPP_CONSTEXPR',
                   '_LIBCPP_CONSTINIT',
+                  '_LIBCPP_DEPRECATED_ATOMIC_SYNC',
                   '_LIBCPP_DEPRECATED_IN_CXX11',
                   '_LIBCPP_DEPRECATED_IN_CXX14',
                   '_LIBCPP_DEPRECATED_IN_CXX17',
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index e5db17daa4823..45aac88e45502 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -75,6 +75,10 @@ Improvements and New Features
 Deprecations and Removals
 -------------------------
 
+- The C++20 synchronization library (``<barrier>``, ``<latch>``, ``atomic::wait``, etc.) has been deprecated
+  in language modes prior to C++20. If you are using these features prior to C++20, please update to ``-std=c++20``.
+  In LLVM 20, the C++20 synchronization library will be removed entirely in language modes prior to C++20.
+
 - TODO: The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable that was used to enable the safe mode has been deprecated and setting
   it triggers an error; use the ``LIBCXX_HARDENING_MODE`` CMake variable with the value ``extensive`` instead. Similarly,
   the ``_LIBCPP_ENABLE_ASSERTIONS`` macro has been deprecated (setting it to ``1`` still enables the extensive mode in
diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h
index 3dfb6937d0325..bd3f659c22df0 100644
--- a/libcxx/include/__atomic/atomic.h
+++ b/libcxx/include/__atomic/atomic.h
@@ -462,22 +462,26 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __
 // atomic_notify_one
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_one();
 }
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_one();
 }
 
 // atomic_notify_all
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_all();
 }
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_all();
 }
 
diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h
index 084366237c16e..3ec3366ecaaf9 100644
--- a/libcxx/include/__atomic/atomic_flag.h
+++ b/libcxx/include/__atomic/atomic_flag.h
@@ -49,22 +49,26 @@ struct atomic_flag {
     __cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m);
   }
 
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const
-      volatile _NOEXCEPT {
+  _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+  wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT {
     std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+  _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
   wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT {
     std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT {
+  _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT {
+    std::__atomic_notify_one(*this);
+  }
+  _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT {
     std::__atomic_notify_one(*this);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); }
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT {
     std::__atomic_notify_all(*this);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); }
+  _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT {
+    std::__atomic_notify_all(*this);
+  }
 
 #if _LIBCPP_STD_VER >= 20
   _LIBCPP_HIDE_FROM_ABI constexpr atomic_flag() _NOEXCEPT : __a_(false) {}
@@ -141,41 +145,43 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m
   __o->clear(__m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT {
   __o->wait(__v);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT {
   __o->wait(__v);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT {
   __o->wait(__v, __m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT {
   __o->wait(__v, __m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT {
   __o->notify_one();
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT {
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT {
   __o->notify_one();
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
 atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT {
   __o->notify_all();
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT {
+inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT {
   __o->notify_all();
 }
 
diff --git a/libcxx/include/__config b/libcxx/include/__config
index e9fda9cd24ebb..9b4155af1e3c6 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -956,6 +956,14 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_(m)
 #  endif
 
+#  if _LIBCPP_STD_VER < 20
+#    define _LIBCPP_DEPRECATED_ATOMIC_SYNC                                                                             \
+      _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to "    \
+                          "using -std=c++20 if you need to use these facilities.")
+#  else
+#    define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */
+#  endif
+
 #  if !defined(_LIBCPP_CXX03_LANG)
 #    define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
 #  else
diff --git a/libcxx/include/barrier b/libcxx/include/barrier
index c5fd84b91925b..d776078267625 100644
--- a/libcxx/include/barrier
+++ b/libcxx/include/barrier
@@ -257,7 +257,7 @@ public:
 #  endif // !_LIBCPP_HAS_NO_TREE_BARRIER
 
 template <class _CompletionF = __empty_completion>
-class barrier {
+class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier {
   __barrier_base<_CompletionF> __b_;
 
 public:
diff --git a/libcxx/include/latch b/libcxx/include/latch
index 3cc7258381143..1937617f7dcc6 100644
--- a/libcxx/include/latch
+++ b/libcxx/include/latch
@@ -66,7 +66,7 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-class latch {
+class _LIBCPP_DEPRECATED_ATOMIC_SYNC latch {
   __atomic_base<ptrdiff_t> __a_;
 
 public:
diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore
index 1375ec3f7c04b..cb2f42c106ca8 100644
--- a/libcxx/include/semaphore
+++ b/libcxx/include/semaphore
@@ -127,7 +127,7 @@ private:
 };
 
 template <ptrdiff_t __least_max_value = _LIBCPP_SEMAPHORE_MAX>
-class counting_semaphore {
+class _LIBCPP_DEPRECATED_ATOMIC_SYNC counting_semaphore {
   __atomic_semaphore_base __semaphore_;
 
 public:
@@ -172,7 +172,9 @@ public:
   }
 };
 
-using binary_semaphore = counting_semaphore<1>;
+_LIBCPP_SUPPRESS_DEPRECATED_PUSH
+using binary_semaphore _LIBCPP_DEPRECATED_ATOMIC_SYNC = counting_semaphore<1>;
+_LIBCPP_SUPPRESS_DEPRECATED_POP
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp
index 2b9f34b731f87..0ec530c922e70 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_all.pass.cpp
@@ -7,9 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // UNSUPPORTED: no-threads
-// XFAIL: c++03
+// UNSUPPORTED: c++03
 // XFAIL: !has-1024-bit-atomics
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <atomic>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp
index dfa781c566009..c21b67d479ae2 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_notify_one.pass.cpp
@@ -7,9 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // UNSUPPORTED: no-threads
-// XFAIL: c++03
+// UNSUPPORTED: c++03
 // XFAIL: !has-1024-bit-atomics
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <atomic>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp
index 38142b336e72c..af99113f13499 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp
@@ -7,9 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // UNSUPPORTED: no-threads
-// XFAIL: c++03
+// UNSUPPORTED: c++03
 // XFAIL: !has-1024-bit-atomics
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <atomic>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp
index 2db95a0b67a7f..bb8c64593b54b 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait_explicit.pass.cpp
@@ -7,9 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // UNSUPPORTED: no-threads
-// XFAIL: c++03
+// UNSUPPORTED: c++03
 // XFAIL: !has-1024-bit-atomics
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <atomic>
diff --git a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp
index 18cdc6d654ac2..d9d9c1dba6bbb 100644
--- a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <barrier>
diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp
index 3fc48261de1b1..aff7b26e16f70 100644
--- a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <barrier>
diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp
index 2aee8624ae3d5..8c45ba9278f28 100644
--- a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <barrier>
diff --git a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp
index 7354dbe6ffe8a..633a0c8bf2366 100644
--- a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <barrier>
diff --git a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp
index d47127a18613b..fe7068d2a574c 100644
--- a/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/ctor.compile.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <barrier>
 
 // explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF());
diff --git a/libcxx/test/std/thread/thread.barrier/max.pass.cpp b/libcxx/test/std/thread/thread.barrier/max.pass.cpp
index ec03c5c87a09c..b09a02e1bdef4 100644
--- a/libcxx/test/std/thread/thread.barrier/max.pass.cpp
+++ b/libcxx/test/std/thread/thread.barrier/max.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <barrier>
 
 #include <barrier>
diff --git a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp
index ddc06d2038cc8..8ca4f37b73b95 100644
--- a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp
+++ b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <latch>
diff --git a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp
index 1503c09509a6c..eb524abd24b98 100644
--- a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp
+++ b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <latch>
diff --git a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp
index 1983f6409cb5a..bca4561bd2f74 100644
--- a/libcxx/test/std/thread/thread.latch/ctor.pass.cpp
+++ b/libcxx/test/std/thread/thread.latch/ctor.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <latch>
 
 // inline constexpr explicit latch(ptrdiff_t __expected);
diff --git a/libcxx/test/std/thread/thread.latch/max.pass.cpp b/libcxx/test/std/thread/thread.latch/max.pass.cpp
index 8b9176c8cac57..bcf353ed9712e 100644
--- a/libcxx/test/std/thread/thread.latch/max.pass.cpp
+++ b/libcxx/test/std/thread/thread.latch/max.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <latch>
 
 #include <latch>
diff --git a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp
index 70ef2cdf71254..8f354463a8697 100644
--- a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp
+++ b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <latch>
diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp
index 3f6e3107e8bce..22eed736c6b75 100644
--- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <semaphore>
diff --git a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp
index 111a650b5ea39..c01c78506587c 100644
--- a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <semaphore>
diff --git a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp
index 28ccc0124d489..dcc298ce11ce8 100644
--- a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <semaphore>
 
 // constexpr explicit counting_semaphore(ptrdiff_t desired);
diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp
index ca7ad0c92e60e..6f3ed5e345e0b 100644
--- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // <semaphore>
 
 #include <semaphore>
diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp
index bf3dd7f7d814f..3c4d179e50433 100644
--- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <semaphore>
diff --git a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp
index 9fa01fc035904..77f15ece221d4 100644
--- a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <semaphore>
diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp
index 0d0f7792592fb..ec159daf87a3f 100644
--- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp
+++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp
@@ -9,6 +9,9 @@
 // UNSUPPORTED: no-threads
 // UNSUPPORTED: c++03, c++11
 
+// Until we drop support for the synchronization library in C++11/14/17
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
 // XFAIL: availability-synchronization_library-missing
 
 // <semaphore>

From bd28889732e14ac6baca686c3ec99a82fc9cd89d Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Tue, 16 Apr 2024 07:54:51 -0700
Subject: [PATCH 07/58] [RISCV] Add coverage for strength reduction of mul 2^N
 +/- 3/5/9

---
 llvm/test/CodeGen/RISCV/rv64zba.ll | 60 ++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 0d1d4838c6113..a84b9e5e7962f 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -567,6 +567,66 @@ define i64 @mul96(i64 %a) {
   ret i64 %c
 }
 
+define i64 @mul119(i64 %a) {
+; CHECK-LABEL: mul119:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 119
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 119
+  ret i64 %c
+}
+
+define i64 @mul123(i64 %a) {
+; CHECK-LABEL: mul123:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 123
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 123
+  ret i64 %c
+}
+
+define i64 @mul125(i64 %a) {
+; CHECK-LABEL: mul125:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 125
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 125
+  ret i64 %c
+}
+
+define i64 @mul131(i64 %a) {
+; CHECK-LABEL: mul131:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 131
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 131
+  ret i64 %c
+}
+
+define i64 @mul133(i64 %a) {
+; CHECK-LABEL: mul133:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 133
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 133
+  ret i64 %c
+}
+
+define i64 @mul137(i64 %a) {
+; CHECK-LABEL: mul137:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 137
+; CHECK-NEXT:    mul a0, a0, a1
+; CHECK-NEXT:    ret
+  %c = mul i64 %a, 137
+  ret i64 %c
+}
+
 define i64 @mul160(i64 %a) {
 ; RV64I-LABEL: mul160:
 ; RV64I:       # %bb.0:

From 1334c034a73b7bf8a7af08be1c33d24a58127c47 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek@amd.com>
Date: Tue, 16 Apr 2024 10:01:34 -0500
Subject: [PATCH 08/58] [flang] Fix test after
 4078afc6d23e25df6baedad61b224ef86a94d42f

This tests requires the OpenMP runtime to be present, but the way that
the lit config detects it fails when "openmp" is added to RUNTIMES
instead of PROJECTS. This caused the tests to be skipped as unsupported
in local and upstream tests.

The actual bug was a missing word in the message, and putting the check
at the wrong line.
---
 flang/test/Semantics/OpenMP/clause-validity01.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90
index 74f154bb0ad67..21b99cb82549a 100644
--- a/flang/test/Semantics/OpenMP/clause-validity01.f90
+++ b/flang/test/Semantics/OpenMP/clause-validity01.f90
@@ -342,8 +342,8 @@
   a = 1.0
   !ERROR: COPYPRIVATE clause is not allowed on the END WORKSHARE directive
   !$omp end workshare nowait copyprivate(a)
+  !ERROR: NOWAIT clause is not allowed on the OMP WORKSHARE directive, use it on OMP END WORKSHARE directive 
   !$omp workshare nowait
-  !ERROR: NOWAIT clause is not allowed on the WORKSHARE directive, use it on OMP END WORKSHARE directive
   !$omp end workshare
   !$omp end parallel
 

From bf1ad1d267b1f911cb9846403d2c3d3250a40870 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 16 Apr 2024 16:01:57 +0100
Subject: [PATCH 09/58] [VectorCombine][X86] Add initial shuffle-of-shuffles.ll
 test cover for #88743

---
 .../VectorCombine/X86/shuffle-of-shuffles.ll  | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
new file mode 100644
index 0000000000000..b5b5bb997c6c7
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+
+; TODO: fold to identity
+
+define <8 x i32> @concat_extract_subvectors(<8 x i32> %x) {
+; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors(
+; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[CONCAT]]
+;
+  %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %concat
+}
+
+; negative test - shuffle contains undef
+
+define <8 x i32> @concat_extract_subvectors_undef(<8 x i32> %x) {
+; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_undef(
+; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 8>
+; CHECK-NEXT:    [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 8>
+; CHECK-NEXT:    [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[CONCAT]]
+;
+  %lo = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 8>
+  %hi = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 8>
+  %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %concat
+}
+
+; negative test - shuffle contains poision
+
+define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
+; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_poison(
+; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 8>
+; CHECK-NEXT:    [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 8>
+; CHECK-NEXT:    [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[CONCAT]]
+;
+  %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 8>
+  %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 8>
+  %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %concat
+}

From f8e2ec13a8c6d33cb7b4f37869b4429ddcf43f01 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 16 Apr 2024 08:29:09 -0700
Subject: [PATCH 10/58] Revert "Add asan tests for libsanitizers. (#88349)"

This reverts commit 82f479ba315a417b6cd01a8c2efdc15c26689f2e due to bot breakage.
---
 lldb/test/API/functionalities/asan/Makefile   |  6 +-
 .../functionalities/asan/TestMemoryHistory.py | 73 +------------------
 .../functionalities/asan/TestReportData.py    | 20 +----
 .../API/functionalities/libsanitizers/util.py |  3 -
 4 files changed, 5 insertions(+), 97 deletions(-)
 delete mode 100644 lldb/test/API/functionalities/libsanitizers/util.py

diff --git a/lldb/test/API/functionalities/asan/Makefile b/lldb/test/API/functionalities/asan/Makefile
index d66696fed7078..4913a18d8cc6f 100644
--- a/lldb/test/API/functionalities/asan/Makefile
+++ b/lldb/test/API/functionalities/asan/Makefile
@@ -1,8 +1,4 @@
 C_SOURCES := main.c
-asan: CFLAGS_EXTRAS := -fsanitize=address -g -gcolumn-info
-asan: all
-
-libsanitizers: CFLAGS_EXTRAS := -fsanitize=address -fsanitize-stable-abi -g -gcolumn-info
-libsanitizers: all
+CFLAGS_EXTRAS := -fsanitize=address -g -gcolumn-info
 
 include Makefile.rules
diff --git a/lldb/test/API/functionalities/asan/TestMemoryHistory.py b/lldb/test/API/functionalities/asan/TestMemoryHistory.py
index ee7939203ead1..00162ae8822c7 100644
--- a/lldb/test/API/functionalities/asan/TestMemoryHistory.py
+++ b/lldb/test/API/functionalities/asan/TestMemoryHistory.py
@@ -9,21 +9,15 @@
 from lldbsuite.test import lldbplatform
 from lldbsuite.test import lldbutil
 
-from functionalities.libsanitizers.util import no_libsanitizers
 
 class AsanTestCase(TestBase):
     @skipIfFreeBSD  # llvm.org/pr21136 runtimes not yet available by default
     @expectedFailureNetBSD
     @skipUnlessAddressSanitizer
     def test(self):
-        self.build(make_targets=["asan"])
+        self.build()
         self.asan_tests()
 
-    @skipIf(oslist=no_match(["macosx"]))
-    def test_libsanitizers_asan(self):
-        self.build(make_targets=["libsanitizers"])
-        self.libsanitizer_tests()
-
     def setUp(self):
         # Call super's setUp().
         TestBase.setUp(self)
@@ -32,71 +26,6 @@ def setUp(self):
         self.line_free = line_number("main.c", "// free line")
         self.line_breakpoint = line_number("main.c", "// break line")
 
-    # Test line numbers: rdar://126237493
-    def libsanitizer_tests(self):
-        target = self.createTestTarget()
-
-        if no_libsanitizers(self):
-            self.skipTest("libsanitizers not found")
-
-        self.runCmd(
-            "env SanitizersAddress=1 MallocSanitizerZone=1 MallocSecureAllocator=0"
-        )
-
-        self.runCmd("run")
-
-        # In libsanitizers, memory history is not supported until a report has been generated
-        self.expect(
-            "thread list",
-            "Process should be stopped due to ASan report",
-            substrs=["stopped", "stop reason = Use of deallocated memory"],
-        )
-
-        # test the 'memory history' command
-        self.expect(
-            "memory history 'pointer'",
-            substrs=[
-                "Memory deallocated by Thread",
-                "a.out`f2",
-                "main.c",
-                "Memory allocated by Thread",
-                "a.out`f1",
-                "main.c",
-            ],
-        )
-
-        # do the same using SB API
-        process = self.dbg.GetSelectedTarget().process
-        val = (
-            process.GetSelectedThread().GetSelectedFrame().EvaluateExpression("pointer")
-        )
-        addr = val.GetValueAsUnsigned()
-        threads = process.GetHistoryThreads(addr)
-        self.assertEqual(threads.GetSize(), 2)
-
-        history_thread = threads.GetThreadAtIndex(0)
-        self.assertTrue(history_thread.num_frames >= 2)
-        self.assertEqual(
-            history_thread.frames[1].GetLineEntry().GetFileSpec().GetFilename(),
-            "main.c",
-        )
-
-        history_thread = threads.GetThreadAtIndex(1)
-        self.assertTrue(history_thread.num_frames >= 2)
-        self.assertEqual(
-            history_thread.frames[1].GetLineEntry().GetFileSpec().GetFilename(),
-            "main.c",
-        )
-
-        # let's free the container (SBThreadCollection) and see if the
-        # SBThreads still live
-        threads = None
-        self.assertTrue(history_thread.num_frames >= 2)
-        self.assertEqual(
-            history_thread.frames[1].GetLineEntry().GetFileSpec().GetFilename(),
-            "main.c",
-        )
-
     def asan_tests(self):
         target = self.createTestTarget()
 
diff --git a/lldb/test/API/functionalities/asan/TestReportData.py b/lldb/test/API/functionalities/asan/TestReportData.py
index de0c1206a57ad..543c5fe66a208 100644
--- a/lldb/test/API/functionalities/asan/TestReportData.py
+++ b/lldb/test/API/functionalities/asan/TestReportData.py
@@ -9,7 +9,6 @@
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 
-from functionalities.libsanitizers.util import no_libsanitizers
 
 class AsanTestReportDataCase(TestBase):
     @skipIfFreeBSD  # llvm.org/pr21136 runtimes not yet available by default
@@ -17,14 +16,9 @@ class AsanTestReportDataCase(TestBase):
     @skipUnlessAddressSanitizer
     @skipIf(archs=["i386"], bugnumber="llvm.org/PR36710")
     def test(self):
-        self.build(make_targets=["asan"])
+        self.build()
         self.asan_tests()
 
-    @skipIf(oslist=no_match(["macosx"]))
-    def test_libsanitizers_asan(self):
-        self.build(make_targets=["libsanitizers"])
-        self.asan_tests(libsanitizers=True)
-
     def setUp(self):
         # Call super's setUp().
         TestBase.setUp(self)
@@ -35,18 +29,10 @@ def setUp(self):
         self.line_crash = line_number("main.c", "// BOOM line")
         self.col_crash = 16
 
-    def asan_tests(self, libsanitizers=False):
+    def asan_tests(self):
         target = self.createTestTarget()
 
-        if libsanitizers and no_libsanitizers(self):
-            self.skipTest("libsanitizers not found")
-
-        if libsanitizers:
-            self.runCmd(
-                "env SanitizersAddress=1 MallocSanitizerZone=1 MallocSecureAllocator=0"
-            )
-        else:
-            self.registerSanitizerLibrariesWithTarget(target)
+        self.registerSanitizerLibrariesWithTarget(target)
 
         self.runCmd("run")
 
diff --git a/lldb/test/API/functionalities/libsanitizers/util.py b/lldb/test/API/functionalities/libsanitizers/util.py
deleted file mode 100644
index ad68541aba8d0..0000000000000
--- a/lldb/test/API/functionalities/libsanitizers/util.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def no_libsanitizers(testbase):
-    testbase.runCmd("image list libsystem_sanitizers.dylib", check=False)
-    return not "libsystem_sanitizers.dylib" in testbase.res.GetOutput()

From 8cee94e989b5bf6fb6455087d48eb6c6e0e23c54 Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald.vandijk@codeplay.com>
Date: Tue, 16 Apr 2024 16:32:57 +0100
Subject: [PATCH 11/58] [RISCV] Fix obvious copy paste error.

CASE_VFMA_OPCODE_VV and CASE_VFMA_CHANGE_OPCODE_VV need to match up if we are
are to avoid "Unexpected opcode" errors, but in CASE_VFMA_CHANGE_OPCODE_VV,
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2 had mistakenly been used instead of
CASE_VFMA_CHANGE_OPCODE_LMULS_MF4.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 668062c8d33f6..14b5cbea71722 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3016,7 +3016,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
   CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
 
 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP)                               \
-  CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16)                     \
+  CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16)                     \
   CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32)                     \
   CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
 

From 51b42b762112f2e77d032efd16fa0d9d31cde494 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Tue, 16 Apr 2024 17:45:02 +0200
Subject: [PATCH 12/58] Revert "[Sema] Mark alias/ifunc targets used and
 consider mangled names" (#88919)

Reverts llvm/llvm-project#87130

Bot is broken with clang crash:
https://lab.llvm.org/buildbot/#/builders/272/builds/14063/steps/6/logs/stdio
---
 clang/lib/Sema/CMakeLists.txt                 |  1 -
 clang/lib/Sema/SemaDeclAttr.cpp               | 44 +++++--------------
 clang/test/AST/ast-dump-attr-json.cpp         |  1 -
 clang/test/Sema/alias-unused-win.cpp          |  2 +-
 clang/test/Sema/alias-unused.cpp              | 16 +++----
 .../llvm-project-overlay/clang/BUILD.bazel    |  1 -
 6 files changed, 19 insertions(+), 46 deletions(-)

diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index a96439df66422..ab3b813a9ccd9 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -1,6 +1,5 @@
 set(LLVM_LINK_COMPONENTS
   Core
-  Demangle
   FrontendHLSL
   FrontendOpenMP
   MC
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index d26f130b5774c..b7b1fbc625a15 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -45,7 +45,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/Assumptions.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/Error.h"
@@ -1984,36 +1983,6 @@ static void handleWeakRefAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) WeakRefAttr(S.Context, AL));
 }
 
-// Mark alias/ifunc target as used. Due to name mangling, we look up the
-// demangled name ignoring parameters (not supported by microsoftDemangle
-// https://github.com/llvm/llvm-project/issues/88825). This should handle the
-// majority of use cases while leaving namespace scope names unmarked.
-static void markUsedForAliasOrIfunc(Sema &S, Decl *D, const ParsedAttr &AL,
-                                    StringRef Str) {
-  std::unique_ptr<char, llvm::FreeDeleter> Demangled;
-  if (S.getASTContext().getCXXABIKind() != TargetCXXABI::Microsoft)
-    Demangled.reset(llvm::itaniumDemangle(Str, /*ParseParams=*/false));
-  std::unique_ptr<MangleContext> MC(S.Context.createMangleContext());
-  SmallString<256> Name;
-
-  const DeclarationNameInfo Target(
-      &S.Context.Idents.get(Demangled ? Demangled.get() : Str), AL.getLoc());
-  LookupResult LR(S, Target, Sema::LookupOrdinaryName);
-  if (S.LookupName(LR, S.TUScope)) {
-    for (NamedDecl *ND : LR) {
-      if (MC->shouldMangleDeclName(ND)) {
-        llvm::raw_svector_ostream Out(Name);
-        Name.clear();
-        MC->mangleName(GlobalDecl(ND), Out);
-      } else {
-        Name = ND->getIdentifier()->getName();
-      }
-      if (Name == Str)
-        ND->markUsed(S.Context);
-    }
-  }
-}
-
 static void handleIFuncAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   StringRef Str;
   if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
@@ -2026,7 +1995,6 @@ static void handleIFuncAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  markUsedForAliasOrIfunc(S, D, AL, Str);
   D->addAttr(::new (S.Context) IFuncAttr(S.Context, AL, Str));
 }
 
@@ -2061,7 +2029,17 @@ static void handleAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     }
   }
 
-  markUsedForAliasOrIfunc(S, D, AL, Str);
+  // Mark target used to prevent unneeded-internal-declaration warnings.
+  if (!S.LangOpts.CPlusPlus) {
+    // FIXME: demangle Str for C++, as the attribute refers to the mangled
+    // linkage name, not the pre-mangled identifier.
+    const DeclarationNameInfo target(&S.Context.Idents.get(Str), AL.getLoc());
+    LookupResult LR(S, target, Sema::LookupOrdinaryName);
+    if (S.LookupQualifiedName(LR, S.getCurLexicalContext()))
+      for (NamedDecl *ND : LR)
+        ND->markUsed(S.Context);
+  }
+
   D->addAttr(::new (S.Context) AliasAttr(S.Context, AL, Str));
 }
 
diff --git a/clang/test/AST/ast-dump-attr-json.cpp b/clang/test/AST/ast-dump-attr-json.cpp
index 883e584bfedf0..051c2956abfdf 100644
--- a/clang/test/AST/ast-dump-attr-json.cpp
+++ b/clang/test/AST/ast-dump-attr-json.cpp
@@ -46,7 +46,6 @@ __thread __attribute__ ((tls_model ("local-exec"))) int tls_model_var;
 // CHECK-NEXT:    "tokLen": 11
 // CHECK-NEXT:   }
 // CHECK-NEXT:  },
-// CHECK-NEXT:  "isUsed": true,
 // CHECK-NEXT:  "name": "global_decl",
 // CHECK-NEXT:  "mangledName": "global_decl",
 // CHECK-NEXT:  "type": {
diff --git a/clang/test/Sema/alias-unused-win.cpp b/clang/test/Sema/alias-unused-win.cpp
index 97d57a3bbd1e3..47c96d4117517 100644
--- a/clang/test/Sema/alias-unused-win.cpp
+++ b/clang/test/Sema/alias-unused-win.cpp
@@ -7,7 +7,7 @@ extern "C" {
 static int f(void) { return 42; } // cxx-warning{{unused function 'f'}}
 int g(void) __attribute__((alias("f")));
 
-static int foo [] = { 42, 0xDEAD };
+static int foo [] = { 42, 0xDEAD }; // cxx-warning{{variable 'foo' is not needed and will not be emitted}}
 extern typeof(foo) bar __attribute__((unused, alias("foo")));
 
 static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
diff --git a/clang/test/Sema/alias-unused.cpp b/clang/test/Sema/alias-unused.cpp
index c0b541c880e52..dc8e46f072d74 100644
--- a/clang/test/Sema/alias-unused.cpp
+++ b/clang/test/Sema/alias-unused.cpp
@@ -14,26 +14,24 @@ extern typeof(foo) bar __attribute__((unused, alias("foo")));
 /// We report a warning in C++ mode because the internal linkage `resolver` gets
 /// mangled as it does not have a language linkage. GCC does not mangle
 /// `resolver` or report a warning.
-static int (*resolver(void))(void) { return f; } // cxx-warning{{unused function 'resolver'}}
+static int (*resolver(void))(void) { return f; } // expected-warning{{unused function 'resolver'}}
 int ifunc(void) __attribute__((ifunc("resolver")));
 
-static int __attribute__((overloadable)) f0(int x) { return x; }
+static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
 static float __attribute__((overloadable)) f0(float x) { return x; } // expected-warning{{unused function 'f0'}}
 int g0(void) __attribute__((alias("_ZL2f0i")));
 
 #ifdef __cplusplus
-static int f1() { return 42; }
+static int f1() { return 42; } // expected-warning{{unused function 'f1'}}
 int g1(void) __attribute__((alias("_ZL2f1v")));
 }
 
-/// We demangle alias/ifunc target and mark all found functions as used.
-
-static int f2(int) { return 42; } // cxx-warning{{unused function 'f2'}}
-static int f2() { return 42; }
+static int f2(int) { return 42; } // expected-warning{{unused function 'f2'}}
+static int f2() { return 42; } // expected-warning{{unused function 'f2'}}
 int g2() __attribute__((alias("_ZL2f2v")));
 
-static int (*resolver1())() { return f; } // cxx-warning{{unused function 'resolver1'}}
-static int (*resolver1(int))() { return f; }
+static int (*resolver1())() { return f; } // expected-warning{{unused function 'resolver1'}}
+static int (*resolver1(int))() { return f; } // expected-warning{{unused function 'resolver1'}}
 int ifunc1() __attribute__((ifunc("_ZL9resolver1i")));
 
 /// TODO: We should report "unused function" for f3(int).
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 725ac6bb38120..c2f77e3abca0e 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -1136,7 +1136,6 @@ cc_library(
         "//llvm:AllTargetsAsmParsers",
         "//llvm:AllTargetsCodeGens",
         "//llvm:Core",
-        "//llvm:Demangle",
         "//llvm:FrontendHLSL",
         "//llvm:FrontendOpenMP",
         "//llvm:MC",

From 9d111286322ec99b32467eef3aeec6b588c49f18 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Tue, 16 Apr 2024 16:48:59 +0100
Subject: [PATCH 13/58] [libclc] Improve clarity of CMake foreach. NFC.

Should be a bit easier to read.
---
 libclc/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index ed2764847e709..f605c3bbbe9dc 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -50,7 +50,7 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DI
   endif()
 
   # Import required tools as targets
-  foreach( tool clang llvm-as llvm-link opt )
+  foreach( tool IN ITEMS clang llvm-as llvm-link opt )
     find_program( LLVM_TOOL_${tool} ${tool} PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH )
     add_executable( libclc::${tool} IMPORTED GLOBAL )
     set_target_properties( libclc::${tool} PROPERTIES IMPORTED_LOCATION ${LLVM_TOOL_${tool}} )
@@ -68,7 +68,7 @@ else()
     message(FATAL_ERROR "Clang is not enabled, but is required to build libclc in-tree")
   endif()
 
-  foreach( tool clang llvm-as llvm-link opt )
+  foreach( tool IN ITEMS clang llvm-as llvm-link opt )
     add_executable(libclc::${tool} ALIAS ${tool})
   endforeach()
 endif()
@@ -181,7 +181,7 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTAL
 install( DIRECTORY generic/include/clc DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" )
 
 if( ENABLE_RUNTIME_SUBNORMAL )
-  foreach( file subnormal_use_default subnormal_disable )
+  foreach( file IN ITEMS subnormal_use_default subnormal_disable )
     link_bc(
        TARGET ${file}
        INPUTS ${PROJECT_SOURCE_DIR}/generic/lib/${file}.ll

From 2704ebaf2885a16155ab7144f8dd0dd459d77089 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin@nvidia.com>
Date: Tue, 16 Apr 2024 08:53:12 -0700
Subject: [PATCH 14/58] [flang][runtime] Create CUDA PTX OBJECT library target
 for F18 runtime CUDA build. (#88821)

This is to experiment with distributing F18 runtime CUDA library
in the form of a pure PTX library. The change is under
FLANG_EXPERIMENTAL_CUDA_RUNTIME CMake control.
---
 flang/cmake/modules/AddFlangOffloadRuntime.cmake | 6 +++++-
 flang/lib/Decimal/CMakeLists.txt                 | 2 +-
 flang/runtime/CMakeLists.txt                     | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 6fb6213e90fc4..e34d3851187ac 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -10,7 +10,7 @@ set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
 set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
   "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")
 
-macro(enable_cuda_compilation files)
+macro(enable_cuda_compilation name files)
   if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
     if (BUILD_SHARED_LIBS)
       message(FATAL_ERROR
@@ -52,6 +52,10 @@ macro(enable_cuda_compilation files)
       include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
       add_compile_definitions(RT_USE_LIBCUDACXX=1)
     endif()
+
+    # Add an OBJECT library consisting of CUDA PTX.
+    llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files})
+    set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON)
   endif()
 endmacro()
 
diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt
index 3d562b8e3ce1e..880b190f1c581 100644
--- a/flang/lib/Decimal/CMakeLists.txt
+++ b/flang/lib/Decimal/CMakeLists.txt
@@ -55,7 +55,7 @@ set(sources
 )
 
 include(AddFlangOffloadRuntime)
-enable_cuda_compilation("${sources}")
+enable_cuda_compilation(FortranDecimal "${sources}")
 enable_omp_offload_compilation("${sources}")
 
 add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources})
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 2a65a22ab674c..bdd0e07bbfd4d 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -224,7 +224,7 @@ set(supported_files
   utf.cpp
   )
 
-enable_cuda_compilation("${supported_files}")
+enable_cuda_compilation(FortranRuntime "${supported_files}")
 enable_omp_offload_compilation("${supported_files}")
 
 if (NOT TARGET FortranFloat128Math)

From a79783d7ad00c4c10c30f637f4bf13551e47f3dd Mon Sep 17 00:00:00 2001
From: Robin Caloudis <robin.caloudis@gmx.de>
Date: Tue, 16 Apr 2024 17:58:11 +0200
Subject: [PATCH 15/58] [libc][fenv] Use proxy header (#88787)

Include types `fexcept_t` and `fenv_t ` from corresponding proxy
headers, as they are available since
https://github.com/llvm/llvm-project/pull/88467.
---
 libc/src/fenv/fegetexceptflag.h | 2 +-
 libc/src/fenv/fesetexceptflag.h | 2 +-
 libc/src/fenv/feupdateenv.cpp   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/fenv/fegetexceptflag.h b/libc/src/fenv/fegetexceptflag.h
index ad72161e536f8..fcb9598658d43 100644
--- a/libc/src/fenv/fegetexceptflag.h
+++ b/libc/src/fenv/fegetexceptflag.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_SRC_FENV_FEGETEXCEPTFLAG_H
 #define LLVM_LIBC_SRC_FENV_FEGETEXCEPTFLAG_H
 
-#include <fenv.h>
+#include "hdr/types/fexcept_t.h"
 
 namespace LIBC_NAMESPACE {
 
diff --git a/libc/src/fenv/fesetexceptflag.h b/libc/src/fenv/fesetexceptflag.h
index 15e62eda1b840..a018358dc9dfc 100644
--- a/libc/src/fenv/fesetexceptflag.h
+++ b/libc/src/fenv/fesetexceptflag.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_SRC_FENV_FESETEXCEPTFLAG_H
 #define LLVM_LIBC_SRC_FENV_FESETEXCEPTFLAG_H
 
-#include <fenv.h>
+#include "hdr/types/fexcept_t.h"
 
 namespace LIBC_NAMESPACE {
 
diff --git a/libc/src/fenv/feupdateenv.cpp b/libc/src/fenv/feupdateenv.cpp
index 7e81b9476da91..0664863538155 100644
--- a/libc/src/fenv/feupdateenv.cpp
+++ b/libc/src/fenv/feupdateenv.cpp
@@ -10,7 +10,7 @@
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/common.h"
 
-#include <fenv.h>
+#include "hdr/types/fenv_t.h"
 
 namespace LIBC_NAMESPACE {
 

From 38895e6578f7728cfb3d41d0880a0e9b358d9afd Mon Sep 17 00:00:00 2001
From: Robin Caloudis <robin.caloudis@gmx.de>
Date: Tue, 16 Apr 2024 17:58:56 +0200
Subject: [PATCH 16/58] [libc][fenv] Remove unnecessary dependencies (#88788)

Remove the fenv macro dependency from the CMake files as the underlying targets
do not make use of it. Note that we do not have to worry about
[corresponding Bazel targets](https://github.com/llvm/llvm-project/blob/main/utils/bazel/llvm-project-overlay/libc/BUILD.bazel#L1138-L1288),
as they look good.
---
 libc/src/fenv/CMakeLists.txt | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/libc/src/fenv/CMakeLists.txt b/libc/src/fenv/CMakeLists.txt
index a28a7ca4c2d82..17e9947412062 100644
--- a/libc/src/fenv/CMakeLists.txt
+++ b/libc/src/fenv/CMakeLists.txt
@@ -17,7 +17,6 @@ add_entrypoint_object(
   HDRS
     fesetround.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -30,7 +29,6 @@ add_entrypoint_object(
   HDRS
     feclearexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -43,7 +41,6 @@ add_entrypoint_object(
   HDRS
     feraiseexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -56,7 +53,6 @@ add_entrypoint_object(
   HDRS
     fetestexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -69,7 +65,6 @@ add_entrypoint_object(
   HDRS
     fegetenv.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.hdr.types.fenv_t
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
@@ -83,7 +78,6 @@ add_entrypoint_object(
   HDRS
     fesetenv.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.hdr.types.fenv_t
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
@@ -111,7 +105,6 @@ add_entrypoint_object(
   HDRS
     fesetexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -166,7 +159,6 @@ add_entrypoint_object(
   HDRS
     feenableexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -179,7 +171,6 @@ add_entrypoint_object(
   HDRS
     fedisableexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2
@@ -192,7 +183,6 @@ add_entrypoint_object(
   HDRS
     fegetexcept.h
   DEPENDS
-    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.fenv_impl
   COMPILE_OPTIONS
     -O2

From 1c6b0f779f66494cb597884c1a52e377bde4bc54 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer@sony.com>
Date: Tue, 16 Apr 2024 17:07:46 +0100
Subject: [PATCH 17/58] [RemoveDI] Add support for debug records to debugify
 (#87383)

This patch changes debugify to support debug variable records, and
subsequently to no longer convert modules automatically to intrinsics
when entering debugify.
---
 llvm/lib/CodeGen/MachineDebugify.cpp          |  18 ++-
 llvm/lib/Transforms/Utils/Debugify.cpp        | 111 +++++++-----------
 .../check-line-and-variables-x.mir            |   1 +
 .../MIRDebugify/check-line-and-variables.ll   |   1 +
 .../MIRDebugify/check-line-and-variables.mir  |   2 +
 .../MIRDebugify/locations-and-values.mir      |   4 +
 .../MIRDebugify/multifunction-module.mir      |   3 +-
 .../DebugInfo/debugify-bogus-dbg-value.ll     |   1 +
 llvm/test/DebugInfo/debugify-each.ll          |  34 ++++++
 llvm/test/DebugInfo/debugify-export.ll        |   3 +
 llvm/test/DebugInfo/debugify-ignore-phi.ll    |   1 +
 .../debugify-original-no-dbg-info.ll          |   1 +
 .../debugify-report-missing-locs-only.ll      |   1 +
 llvm/test/DebugInfo/debugify.ll               |  27 +++++
 llvm/test/DebugInfo/pr37964.ll                |   1 +
 .../test/DebugInfo/salvage-cast-debug-info.ll |   2 +-
 llvm/test/DebugInfo/verify-di-preserve.ll     |   4 +-
 17 files changed, 142 insertions(+), 73 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index c264e199cf472..bffdd51bfbca7 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -65,6 +65,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
   // all the others.
   Function *DbgValF = M.getFunction("llvm.dbg.value");
   DbgValueInst *EarliestDVI = nullptr;
+  DbgVariableRecord *EarliestDVR = nullptr;
   DenseMap<unsigned, DILocalVariable *> Line2Var;
   DIExpression *Expr = nullptr;
   if (DbgValF) {
@@ -80,6 +81,20 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
       Expr = DVI->getExpression();
     }
   }
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+        if (!DVR.isDbgValue())
+          continue;
+        unsigned Line = DVR.getDebugLoc().getLine();
+        assert(Line != 0 && "debugify should not insert line 0 locations");
+        Line2Var[Line] = DVR.getVariable();
+        if (!EarliestDVR || Line < EarliestDVR->getDebugLoc().getLine())
+          EarliestDVR = &DVR;
+        Expr = DVR.getExpression();
+      }
+    }
+  }
   if (Line2Var.empty())
     return true;
 
@@ -109,7 +124,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
       // Find a suitable local variable for the DBG_VALUE.
       unsigned Line = MI.getDebugLoc().getLine();
       if (!Line2Var.count(Line))
-        Line = EarliestDVI->getDebugLoc().getLine();
+        Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine()
+                           : EarliestDVR->getDebugLoc().getLine();
       DILocalVariable *LocalVar = Line2Var[Line];
       assert(LocalVar && "No variable for current line?");
       VarSet.insert(LocalVar);
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 200bad22148f0..fcc82eadac36c 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -87,10 +87,6 @@ bool llvm::applyDebugifyMetadata(
     return false;
   }
 
-  bool NewDebugMode = M.IsNewDbgInfoFormat;
-  if (NewDebugMode)
-    M.convertFromNewDbgValues();
-
   DIBuilder DIB(M);
   LLVMContext &Ctx = M.getContext();
   auto *Int32Ty = Type::getInt32Ty(Ctx);
@@ -214,9 +210,6 @@ bool llvm::applyDebugifyMetadata(
   if (!M.getModuleFlag(DIVersionKey))
     M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION);
 
-  if (NewDebugMode)
-    M.convertToNewDbgValues();
-
   return true;
 }
 
@@ -311,10 +304,6 @@ bool llvm::collectDebugInfoMetadata(Module &M,
     return false;
   }
 
-  bool NewDebugMode = M.IsNewDbgInfoFormat;
-  if (NewDebugMode)
-    M.convertFromNewDbgValues();
-
   uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size();
   // Visit each instruction.
   for (Function &F : Functions) {
@@ -349,20 +338,23 @@ bool llvm::collectDebugInfoMetadata(Module &M,
 
         // Cllect dbg.values and dbg.declare.
         if (DebugifyLevel > Level::Locations) {
-          if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+          auto HandleDbgVariable = [&](auto *DbgVar) {
             if (!SP)
-              continue;
+              return;
             // Skip inlined variables.
-            if (I.getDebugLoc().getInlinedAt())
-              continue;
+            if (DbgVar->getDebugLoc().getInlinedAt())
+              return;
             // Skip undef values.
-            if (DVI->isKillLocation())
-              continue;
+            if (DbgVar->isKillLocation())
+              return;
 
-            auto *Var = DVI->getVariable();
+            auto *Var = DbgVar->getVariable();
             DebugInfoBeforePass.DIVariables[Var]++;
-            continue;
-          }
+          };
+          for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+            HandleDbgVariable(&DVR);
+          if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+            HandleDbgVariable(DVI);
         }
 
         // Skip debug instructions other than dbg.value and dbg.declare.
@@ -379,9 +371,6 @@ bool llvm::collectDebugInfoMetadata(Module &M,
     }
   }
 
-  if (NewDebugMode)
-    M.convertToNewDbgValues();
-
   return true;
 }
 
@@ -561,10 +550,6 @@ bool llvm::checkDebugInfoMetadata(Module &M,
     return false;
   }
 
-  bool NewDebugMode = M.IsNewDbgInfoFormat;
-  if (NewDebugMode)
-    M.convertFromNewDbgValues();
-
   // Map the debug info holding DIs after a pass.
   DebugInfoPerPass DebugInfoAfterPass;
 
@@ -599,20 +584,23 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         // Collect dbg.values and dbg.declares.
         if (DebugifyLevel > Level::Locations) {
-          if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+          auto HandleDbgVariable = [&](auto *DbgVar) {
             if (!SP)
-              continue;
+              return;
             // Skip inlined variables.
-            if (I.getDebugLoc().getInlinedAt())
-              continue;
+            if (DbgVar->getDebugLoc().getInlinedAt())
+              return;
             // Skip undef values.
-            if (DVI->isKillLocation())
-              continue;
+            if (DbgVar->isKillLocation())
+              return;
 
-            auto *Var = DVI->getVariable();
+            auto *Var = DbgVar->getVariable();
             DebugInfoAfterPass.DIVariables[Var]++;
-            continue;
-          }
+          };
+          for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+            HandleDbgVariable(&DVR);
+          if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+            HandleDbgVariable(DVI);
         }
 
         // Skip debug instructions other than dbg.value and dbg.declare.
@@ -675,16 +663,14 @@ bool llvm::checkDebugInfoMetadata(Module &M,
   // the debugging information from the previous pass.
   DebugInfoBeforePass = DebugInfoAfterPass;
 
-  if (NewDebugMode)
-    M.convertToNewDbgValues();
-
   LLVM_DEBUG(dbgs() << "\n\n");
   return Result;
 }
 
 namespace {
-/// Return true if a mis-sized diagnostic is issued for \p DVI.
-bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
+/// Return true if a mis-sized diagnostic is issued for \p DbgVal.
+template <typename DbgValTy>
+bool diagnoseMisSizedDbgValue(Module &M, DbgValTy *DbgVal) {
   // The size of a dbg.value's value operand should match the size of the
   // variable it corresponds to.
   //
@@ -693,22 +679,22 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
 
   // For now, don't try to interpret anything more complicated than an empty
   // DIExpression. Eventually we should try to handle OP_deref and fragments.
-  if (DVI->getExpression()->getNumElements())
+  if (DbgVal->getExpression()->getNumElements())
     return false;
 
-  Value *V = DVI->getVariableLocationOp(0);
+  Value *V = DbgVal->getVariableLocationOp(0);
   if (!V)
     return false;
 
   Type *Ty = V->getType();
   uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
-  std::optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
+  std::optional<uint64_t> DbgVarSize = DbgVal->getFragmentSizeInBits();
   if (!ValueOperandSize || !DbgVarSize)
     return false;
 
   bool HasBadSize = false;
   if (Ty->isIntegerTy()) {
-    auto Signedness = DVI->getVariable()->getSignedness();
+    auto Signedness = DbgVal->getVariable()->getSignedness();
     if (Signedness && *Signedness == DIBasicType::Signedness::Signed)
       HasBadSize = ValueOperandSize < *DbgVarSize;
   } else {
@@ -718,7 +704,7 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
   if (HasBadSize) {
     dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize
           << ", but its variable has size " << *DbgVarSize << ": ";
-    DVI->print(dbg());
+    DbgVal->print(dbg());
     dbg() << "\n";
   }
   return HasBadSize;
@@ -735,10 +721,6 @@ bool checkDebugifyMetadata(Module &M,
     return false;
   }
 
-  bool NewDebugMode = M.IsNewDbgInfoFormat;
-  if (NewDebugMode)
-    M.convertFromNewDbgValues();
-
   auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
     return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
         ->getZExtValue();
@@ -780,18 +762,23 @@ bool checkDebugifyMetadata(Module &M,
     }
 
     // Find missing variables and mis-sized debug values.
-    for (Instruction &I : instructions(F)) {
-      auto *DVI = dyn_cast<DbgValueInst>(&I);
-      if (!DVI)
-        continue;
-
+    auto CheckForMisSized = [&](auto *DbgVal) {
       unsigned Var = ~0U;
-      (void)to_integer(DVI->getVariable()->getName(), Var, 10);
+      (void)to_integer(DbgVal->getVariable()->getName(), Var, 10);
       assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable");
-      bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI);
+      bool HasBadSize = diagnoseMisSizedDbgValue(M, DbgVal);
       if (!HasBadSize)
         MissingVars.reset(Var - 1);
       HasErrors |= HasBadSize;
+    };
+    for (Instruction &I : instructions(F)) {
+      for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+        if (DVR.isDbgValue() || DVR.isDbgAssign())
+          CheckForMisSized(&DVR);
+      auto *DVI = dyn_cast<DbgValueInst>(&I);
+      if (!DVI)
+        continue;
+      CheckForMisSized(DVI);
     }
   }
 
@@ -820,9 +807,6 @@ bool checkDebugifyMetadata(Module &M,
   if (Strip)
     Ret = stripDebugifyMetadata(M);
 
-  if (NewDebugMode)
-    M.convertToNewDbgValues();
-
   return Ret;
 }
 
@@ -1052,10 +1036,6 @@ FunctionPass *createCheckDebugifyFunctionPass(
 
 PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
                                               ModuleAnalysisManager &) {
-  bool NewDebugMode = M.IsNewDbgInfoFormat;
-  if (NewDebugMode)
-    M.convertFromNewDbgValues();
-
   if (Mode == DebugifyMode::SyntheticDebugInfo)
     checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
                                    "CheckModuleDebugify", Strip, StatsMap);
@@ -1065,9 +1045,6 @@ PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
       "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
       OrigDIVerifyBugsReportFilePath);
 
-  if (NewDebugMode)
-    M.convertToNewDbgValues();
-
   return PreservedAnalyses::all();
 }
 
diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir
index eaa627966347f..40ea01189f2cd 100644
--- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir
+++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir
@@ -1,5 +1,6 @@
 # REQUIRES: x86-registered-target
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s
+# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s
 --- |
   ; ModuleID = 'check-line-and-variables.mir'
   source_filename = "check-line-and-variables.c"
diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll
index 9033fd2f147c4..56c7cf45705a7 100644
--- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll
+++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s
+; RUN: llc --experimental-debuginfo-iterators=false -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s
 
 ; ModuleID = 'main.c'
 source_filename = "main.c"
diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir
index 9eb722258b703..0805a7f4cfc6c 100644
--- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir
+++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir
@@ -1,6 +1,8 @@
 # REQUIRES: x86-registered-target
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS
+# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s
+# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS
 --- |
   ; ModuleID = 'check-line-and-variables.mir'
   source_filename = "check-line-and-variables.ll"
diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir
index 59dcff9efd4d5..3035fb8eab3f8 100644
--- a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir
+++ b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir
@@ -2,6 +2,10 @@
 # RUN: llc -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s
 # RUN: llc -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s
 # RUN: llc -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s
+# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s
+# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s
+# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s
+# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s
 
 --- |
   ; ModuleID = 'loc-only.ll'
diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir
index fe4fcc1a15bb8..8079db926e1b0 100644
--- a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir
+++ b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir
@@ -1,6 +1,5 @@
-# FIXME: Remove rm after a few weeks.
-# RUN: rm -f %S/multifunction-module.s
 # RUN: llc -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s
+# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s
 
 # CHECK: Machine IR debug info check: PASS
 # CHECK-NOT: Assertion `Var <= NumVars && "Unexpected name for DILocalVariable"'
diff --git a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll
index 4990979f10c53..55e436b1a93b2 100644
--- a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll
+++ b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -passes=check-debugify < %s 2>&1 | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s 2>&1 | FileCheck %s
 
 define <2 x i64> @test-fun(<2 x i64> %A) !dbg !6 {
   %and = and <2 x i64> %A, <i64 23, i64 42>, !dbg !14
diff --git a/llvm/test/DebugInfo/debugify-each.ll b/llvm/test/DebugInfo/debugify-each.ll
index e9241dedb6960..7685b57b5dd15 100644
--- a/llvm/test/DebugInfo/debugify-each.ll
+++ b/llvm/test/DebugInfo/debugify-each.ll
@@ -40,6 +40,40 @@
 ; RUN: opt -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t
 ; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE
 
+; Repeat the same checks with debug intrinsics enabled.
+; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O3 -S -o /dev/null < %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS
+; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS
+; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes='default<O3>' %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS
+; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS
+
+; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -debugify-each -O3 -S -o /dev/null < %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS
+; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS
+
+; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes='instrprof,instrprof,sroa,sccp' -S -o /dev/null < %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS
+; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS
+
+; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O1 < %s | opt -O2 -o /dev/null
+
+; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-quiet -debugify-each -O1 < %s 2>&1 | count 0
+
+; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s -S -o %t.before
+; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s -S -o %t.after
+; RUN: diff %t.before %t.after
+
+; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s | llvm-dis -o %t.before
+; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s | llvm-dis -o %t.after
+; RUN: diff %t.before %t.after
+
+; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=instsimplify -S -o /dev/null < %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS-ONE
+
+; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t
+; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE
+
 define void @foo(i32 %arg) {
   call i32 asm "bswap $0", "=r,r"(i32 %arg)
   ret void
diff --git a/llvm/test/DebugInfo/debugify-export.ll b/llvm/test/DebugInfo/debugify-export.ll
index 6e5952d433da9..30333ca908b0d 100644
--- a/llvm/test/DebugInfo/debugify-export.ll
+++ b/llvm/test/DebugInfo/debugify-export.ll
@@ -1,6 +1,9 @@
 ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s
 ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s
 
+; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s
+
 ; CHECK: Pass Name
 ; CHECK-SAME: # of missing debug values
 ; CHECK-SAME: # of missing locations
diff --git a/llvm/test/DebugInfo/debugify-ignore-phi.ll b/llvm/test/DebugInfo/debugify-ignore-phi.ll
index 322ccafa22ac8..643df1d960485 100644
--- a/llvm/test/DebugInfo/debugify-ignore-phi.ll
+++ b/llvm/test/DebugInfo/debugify-ignore-phi.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -passes=check-debugify < %s -S 2>&1 | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S 2>&1 | FileCheck %s
 
 define void @test_phi(i1 %cond) !dbg !6 {
   br i1 %cond, label %1, label %2, !dbg !11
diff --git a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll
index 941b294fb8556..4cbbfc5c215e2 100644
--- a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll
+++ b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s
 
 ; CHECK: ModuleDebugify (original debuginfo): Skipping module without debug info
 ; CHECK-NEXT: CheckModuleDebugify (original debuginfo): Skipping module without debug info
diff --git a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll
index 1c5daa19c6484..04b7636f025a0 100644
--- a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll
+++ b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar"
+; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar"
 
 ; CHECK: WARNING: Instruction with empty DebugLoc in function foo --   ret void
 define void @foo() !dbg !6 {
diff --git a/llvm/test/DebugInfo/debugify.ll b/llvm/test/DebugInfo/debugify.ll
index 5ce6795d41b6b..191015f825933 100644
--- a/llvm/test/DebugInfo/debugify.ll
+++ b/llvm/test/DebugInfo/debugify.ll
@@ -25,6 +25,33 @@
 ; RUN: opt -enable-debugify -O1 < %s | opt -O2 -o /dev/null
 ; RUN: opt -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null
 
+;; Perform the same checks again for intrinsic debug info
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s
+
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-REPEAT
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-REPEAT
+
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \
+; RUN:   FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL"
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \
+; RUN:   FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL"
+; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=verify -S -o - < %s | \
+; RUN:   FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL"
+
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,strip,check-debugify -S -o - < %s 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-WARN
+
+; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=strip -S -o - < %s 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-WARN
+
+; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -S -o - < %s 2>&1 | FileCheck %s -check-prefix=PASS
+
+; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -O1 < %s | opt -O2 -o /dev/null
+; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null
+
 ; CHECK-LABEL: define void @foo
 define void @foo() {
 ; CHECK: ret void, !dbg ![[RET1:.*]]
diff --git a/llvm/test/DebugInfo/pr37964.ll b/llvm/test/DebugInfo/pr37964.ll
index 9581f1a6b35dc..63db67d2bd37f 100644
--- a/llvm/test/DebugInfo/pr37964.ll
+++ b/llvm/test/DebugInfo/pr37964.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s
 
 ; CHECK-NOT: ERROR: Instruction with empty DebugLoc in function _Z3bazv --  {{%.*}} = phi
 ; CHECK: CheckFunctionDebugify [GVNPass]: PASS
diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll
index 4676aee3d4e48..b72f717a4f2de 100644
--- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll
+++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll
@@ -1,5 +1,5 @@
 ; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s
-; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S --try-experimental-debuginfo-iterators | FileCheck %s
+; RUN: opt --experimental-debuginfo-iterators=false %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s
 define i32 @foo(i64 %nose, i32 %more) {
 ; CHECK-LABEL: @foo(
 ; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned
diff --git a/llvm/test/DebugInfo/verify-di-preserve.ll b/llvm/test/DebugInfo/verify-di-preserve.ll
index a2f1b1dd78dc5..92fc62a0b34c4 100644
--- a/llvm/test/DebugInfo/verify-di-preserve.ll
+++ b/llvm/test/DebugInfo/verify-di-preserve.ll
@@ -1,10 +1,10 @@
 ; RUN: opt %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s
-; RUN: opt --try-experimental-debuginfo-iterators %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s
+; RUN: opt --experimental-debuginfo-iterators=false %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s
 
 ; VERIFY: CheckModuleDebugify (original debuginfo):
 
 ; RUN: opt %s -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s
-; RUN: opt %s  --try-experimental-debuginfo-iterators -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s
+; RUN: opt %s --experimental-debuginfo-iterators=false -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s
 
 ; VERIFY-EACH: DeadArgumentEliminationPass
 ; VERIFY-EACH: GlobalDCEPass

From 45eabd1362527d1b3a27a90f7479865785b763ee Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq@debian.org>
Date: Wed, 17 Apr 2024 00:52:15 +0800
Subject: [PATCH 18/58] [flang,test] Add -resource-dir option to
 msvc-dependent-lib-flags.f90 (#88894)

For aarch64-windows-msvc, clang_rt.builtins is placed in windows subdir
instead of triple subdir, and the name of clang_rt.builtins is
clang_rt.builtins-aarch64.lib.

So let's use `-resource-dir` option to fix test failure.

Please see talk for PR#87866.
---
 flang/test/Driver/msvc-dependent-lib-flags.f90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90
index 643dbe9e949cb..6cfc969e92b20 100644
--- a/flang/test/Driver/msvc-dependent-lib-flags.f90
+++ b/flang/test/Driver/msvc-dependent-lib-flags.f90
@@ -1,7 +1,7 @@
-! RUN: %flang -### --target=aarch64-windows-msvc %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
-! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
-! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
-! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
 
 ! MSVC: -fc1
 ! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib

From 22e6bf77ad8781810fc81fff4c447c03cdf6f419 Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsapsai@apple.com>
Date: Tue, 16 Apr 2024 10:12:26 -0700
Subject: [PATCH 19/58] [unused-includes][Serialization] Remove unused
 includes. NFC. (#88790)

---
 clang/include/clang/Serialization/ModuleFileExtension.h | 1 -
 clang/lib/Serialization/ASTReader.cpp                   | 1 -
 clang/lib/Serialization/ASTWriterDecl.cpp               | 1 -
 clang/lib/Serialization/ASTWriterStmt.cpp               | 1 -
 clang/lib/Serialization/GeneratePCH.cpp                 | 1 -
 clang/lib/Serialization/GlobalModuleIndex.cpp           | 1 -
 clang/lib/Serialization/ModuleFileExtension.cpp         | 2 +-
 clang/lib/Serialization/PCHContainerOperations.cpp      | 2 --
 8 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h
index d7d456c8b5db8..50ce401516275 100644
--- a/clang/include/clang/Serialization/ModuleFileExtension.h
+++ b/clang/include/clang/Serialization/ModuleFileExtension.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H
 #define LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H
 
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/Support/ExtensibleRTTI.h"
 #include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/MD5.h"
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index cf0726460bfca..b28df03b4a95e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -31,7 +31,6 @@
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/ODRDiagsEmitter.h"
-#include "clang/AST/ODRHash.h"
 #include "clang/AST/OpenACCClause.h"
 #include "clang/AST/OpenMPClause.h"
 #include "clang/AST/RawCommentList.h"
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index d0d49bcdf991a..c6db107e0ca42 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -16,7 +16,6 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DeclVisitor.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/ODRHash.h"
 #include "clang/AST/OpenMPClause.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
 #include "clang/Basic/SourceManager.h"
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index e3816181e2b2b..a736a7b0ef726 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -19,7 +19,6 @@
 #include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Lex/Token.h"
-#include "clang/Sema/DeclSpec.h"
 #include "clang/Serialization/ASTRecordWriter.h"
 #include "llvm/Bitstream/BitstreamWriter.h"
 using namespace clang;
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index 2fece29f34487..bed74399098d7 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -17,7 +17,6 @@
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/SemaConsumer.h"
-#include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/Bitstream/BitstreamWriter.h"
 
diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp
index 8ff10f6a8621e..f09ceb8d31620 100644
--- a/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -13,7 +13,6 @@
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "ASTReaderInternals.h"
 #include "clang/Basic/FileManager.h"
-#include "clang/Lex/HeaderSearch.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ModuleFile.h"
 #include "clang/Serialization/PCHContainerOperations.h"
diff --git a/clang/lib/Serialization/ModuleFileExtension.cpp b/clang/lib/Serialization/ModuleFileExtension.cpp
index 95fff41e0d7a8..729529b5fca18 100644
--- a/clang/lib/Serialization/ModuleFileExtension.cpp
+++ b/clang/lib/Serialization/ModuleFileExtension.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Serialization/ModuleFileExtension.h"
-#include "llvm/ADT/Hashing.h"
+
 using namespace clang;
 
 char ModuleFileExtension::ID = 0;
diff --git a/clang/lib/Serialization/PCHContainerOperations.cpp b/clang/lib/Serialization/PCHContainerOperations.cpp
index 56ca3394385b4..4aedb7debcff2 100644
--- a/clang/lib/Serialization/PCHContainerOperations.cpp
+++ b/clang/lib/Serialization/PCHContainerOperations.cpp
@@ -12,8 +12,6 @@
 
 #include "clang/Serialization/PCHContainerOperations.h"
 #include "clang/AST/ASTConsumer.h"
-#include "clang/Lex/ModuleLoader.h"
-#include "llvm/Bitstream/BitstreamReader.h"
 #include "llvm/Support/raw_ostream.h"
 #include <utility>
 

From b566810add5b7c5695bdd2c39710b78af9dc83ba Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsapsai@apple.com>
Date: Tue, 16 Apr 2024 10:13:15 -0700
Subject: [PATCH 20/58] [unused-includes] PCHContainerOperations uses
 MemoryBufferRef, not MemoryBuffer. NFC. (#88794)

---
 clang/include/clang/Serialization/PCHContainerOperations.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/PCHContainerOperations.h b/clang/include/clang/Serialization/PCHContainerOperations.h
index ddfddf2dafadf..c9a7e334ce6eb 100644
--- a/clang/include/clang/Serialization/PCHContainerOperations.h
+++ b/clang/include/clang/Serialization/PCHContainerOperations.h
@@ -12,7 +12,7 @@
 #include "clang/Basic/Module.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
 #include <memory>
 
 namespace llvm {

From 454d4496970f665200b5b300578894d78405b6ca Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Tue, 16 Apr 2024 13:13:49 -0400
Subject: [PATCH 21/58] [OpenMP] Use a memory fence before incrementing the
 dispatch buffer index (#87995)

This patch uses a memory fence in function `__kmp_dispatch_next()` to
flush pending memory write invalidates before incrementing the
`volatile` variable `buffer_index` to fix intermittent time-outs of
OpenMP runtime LIT test cases `env/kmp_set_dispatch_buf.c` and
`worksharing/for/kmp_set_dispatch_buf.c`, noting that the same is needed
for incrementing `buffer_index` in function `__kmpc_next_section()`
(line 2600 of `kmp_dispatch.cpp`).
---
 openmp/runtime/src/kmp_dispatch.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index ac85b2b3f2fcd..fc33376511817 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -2397,6 +2397,8 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
           sh->u.s.ordered_iteration = 0;
         }
 
+        KMP_MB(); /* Flush all pending memory write invalidates.  */
+
         sh->buffer_index += __kmp_dispatch_num_buffers;
         KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",
                        gtid, sh->buffer_index));

From 22bba85d82637d0446928ff6ff41f98583f3d3b2 Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Tue, 16 Apr 2024 13:14:29 -0400
Subject: [PATCH 22/58] [OpenMP][test][AIX] Make 64 the max number of threads
 for capacity tests in AIX 32-bit (#88739)

This patch makes 64 the max number of threads for 2 capacity tests in
AIX 32-bit mode rather than `XFAIL`ing them.
---
 .../capacity_mix_threads.cpp                  | 20 +++++++++++++------
 .../hidden_helper_task/capacity_nthreads.cpp  | 20 +++++++++++++------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
index 3f2ceef0c4add..36825dbebafb5 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
@@ -1,7 +1,4 @@
 // RUN: %libomp-cxx-compile-and-run
-//
-// AIX runs out of resource in 32-bit with 4*omp_get_max_threads() threads.
-// XFAIL: aix && ppc
 
 #include <omp.h>
 
@@ -11,6 +8,12 @@
 #include <thread>
 #include <vector>
 
+// AIX runs out of resource in 32-bit if 4*omp_get_max_threads() is more
+// than 64 threads with the default stack size.
+#if defined(_AIX) && !__LP64__
+#define MAX_THREADS 64
+#endif
+
 void dummy_root() {
   // omp_get_max_threads() will do middle initialization
   int nthreads = omp_get_max_threads();
@@ -18,9 +21,14 @@ void dummy_root() {
 }
 
 int main(int argc, char *argv[]) {
-  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
-                                  4 * omp_get_num_procs()),
-                         std::numeric_limits<int>::max());
+  int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                            4 * omp_get_num_procs()),
+                   std::numeric_limits<int>::max());
+
+#if defined(_AIX) && !__LP64__
+  if (N > MAX_THREADS)
+    N = MAX_THREADS;
+#endif
 
   std::vector<int> data(N);
 
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
index f7405d00255cb..1cceee95e704b 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
@@ -1,7 +1,4 @@
 // RUN: %libomp-cxx-compile-and-run
-//
-// AIX runs out of resource in 32-bit with 4*omp_get_max_threads() threads.
-// XFAIL: aix && ppc
 
 #include <omp.h>
 
@@ -10,10 +7,21 @@
 #include <limits>
 #include <vector>
 
+// AIX runs out of resource in 32-bit if 4*omp_get_max_threads() is more
+// than 64 threads with the default stacksize.
+#if defined(_AIX) && !__LP64__
+#define MAX_THREADS 64
+#endif
+
 int main(int argc, char *argv[]) {
-  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
-                                  4 * omp_get_num_procs()),
-                         std::numeric_limits<int>::max());
+  int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                            4 * omp_get_num_procs()),
+                   std::numeric_limits<int>::max());
+
+#if defined(_AIX) && !__LP64__
+  if (N > MAX_THREADS)
+    N = MAX_THREADS;
+#endif
 
   std::vector<int> data(N);
 

From 8137bd9e03d636a27701a85b6efe899f9571cac5 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 16 Apr 2024 10:16:48 -0700
Subject: [PATCH 23/58] [memprof] Use CSId to construct MemProfRecord (#88362)

We are in the process of referring to call stacks with CallStackId in
IndexedMemProfRecord and IndexedAllocationInfo instead of holding call
stacks inline (both in memory and the serialized format).  Doing so
deduplicates call stacks and reduces the MemProf profile file size.

Before we can eliminate the two fields holding call stacks inline:

- IndexedAllocationInfo::CallStack
- IndexedMemProfRecord::CallSites

we need to eliminate all the read operations on them.

This patch is a step toward that direction.  Specifically, we
eliminate the read operations in the context of MemProfReader and
RawMemProfReader.  A subsequent patch will eliminate the read
operations during the serialization.
---
 llvm/include/llvm/ProfileData/MemProf.h       |  8 ++
 llvm/include/llvm/ProfileData/MemProfReader.h | 20 ++++-
 llvm/lib/ProfileData/MemProf.cpp              | 18 ++++
 llvm/lib/ProfileData/MemProfReader.cpp        | 26 ++++++
 llvm/unittests/ProfileData/MemProfTest.cpp    | 85 +++++++++++++++++++
 5 files changed, 153 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 0431c182276ec..3520034fb1c94 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -16,6 +16,8 @@
 namespace llvm {
 namespace memprof {
 
+struct MemProfRecord;
+
 // The versions of the indexed MemProf format
 enum IndexedVersion : uint64_t {
   // Version 0: This version didn't have a version field.
@@ -392,6 +394,12 @@ struct IndexedMemProfRecord {
                                           const unsigned char *Buffer,
                                           IndexedVersion Version);
 
+  // Convert IndexedMemProfRecord to MemProfRecord.  Callback is used to
+  // translate CallStackId to call stacks with frames inline.
+  MemProfRecord toMemProfRecord(
+      std::function<const llvm::SmallVector<Frame>(const CallStackId)> Callback)
+      const;
+
   // Returns the GUID for the function name after canonicalization. For
   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
   // mapped to functions using this GUID.
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 89f49a20a6089..1f84fefad03e3 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -70,8 +70,20 @@ class MemProfReader {
       Callback =
           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
 
+    auto CallStackCallback = [&](CallStackId CSId) {
+      llvm::SmallVector<Frame> CallStack;
+      auto Iter = CSIdToCallStack.find(CSId);
+      assert(Iter != CSIdToCallStack.end());
+      for (FrameId Id : Iter->second)
+        CallStack.push_back(Callback(Id));
+      return CallStack;
+    };
+
     const IndexedMemProfRecord &IndexedRecord = Iter->second;
-    GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)};
+    GuidRecord = {
+        Iter->first,
+        IndexedRecord.toMemProfRecord(CallStackCallback),
+    };
     Iter++;
     return Error::success();
   }
@@ -84,9 +96,7 @@ class MemProfReader {
   // Initialize the MemProfReader with the frame mappings and profile contents.
   MemProfReader(
       llvm::DenseMap<FrameId, Frame> FrameIdMap,
-      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
-      : IdToFrame(std::move(FrameIdMap)),
-        FunctionProfileData(std::move(ProfData)) {}
+      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
 
 protected:
   // A helper method to extract the frame from the IdToFrame map.
@@ -97,6 +107,8 @@ class MemProfReader {
   }
   // A mapping from FrameId (a hash of the contents) to the frame.
   llvm::DenseMap<FrameId, Frame> IdToFrame;
+  // A mapping from CallStackId to the call stack.
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack;
   // A mapping from function GUID, hash of the canonical function symbol to the
   // memprof profile data for that function, i.e allocation and callsite info.
   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 97414505f1c13..1ca0a02d3cbde 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -224,6 +224,24 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
   llvm_unreachable("unsupported MemProf version");
 }
 
+MemProfRecord IndexedMemProfRecord::toMemProfRecord(
+    std::function<const llvm::SmallVector<Frame>(const CallStackId)> Callback)
+    const {
+  MemProfRecord Record;
+
+  for (const memprof::IndexedAllocationInfo &IndexedAI : AllocSites) {
+    memprof::AllocationInfo AI;
+    AI.Info = IndexedAI.Info;
+    AI.CallStack = Callback(IndexedAI.CSId);
+    Record.AllocSites.push_back(AI);
+  }
+
+  for (memprof::CallStackId CSId : CallSiteIds)
+    Record.CallSites.push_back(Callback(CSId));
+
+  return Record;
+}
+
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
   // Canonicalize the function name to drop suffixes such as ".llvm.". Note
   // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 580867a9083fd..91556f036c777 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -183,6 +183,28 @@ std::string getBuildIdString(const SegmentEntry &Entry) {
 }
 } // namespace
 
+MemProfReader::MemProfReader(
+    llvm::DenseMap<FrameId, Frame> FrameIdMap,
+    llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
+    : IdToFrame(std::move(FrameIdMap)),
+      FunctionProfileData(std::move(ProfData)) {
+  // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
+  // while storing CallStack in CSIdToCallStack.
+  for (auto &KV : FunctionProfileData) {
+    IndexedMemProfRecord &Record = KV.second;
+    for (auto &AS : Record.AllocSites) {
+      CallStackId CSId = hashCallStack(AS.CallStack);
+      AS.CSId = CSId;
+      CSIdToCallStack.insert({CSId, AS.CallStack});
+    }
+    for (auto &CS : Record.CallSites) {
+      CallStackId CSId = hashCallStack(CS);
+      Record.CallSiteIds.push_back(CSId);
+      CSIdToCallStack.insert({CSId, CS});
+    }
+  }
+}
+
 Expected<std::unique_ptr<RawMemProfReader>>
 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
                          bool KeepName) {
@@ -445,6 +467,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     }
 
     CallStackId CSId = hashCallStack(Callstack);
+    CSIdToCallStack.insert({CSId, Callstack});
 
     // We attach the memprof record to each function bottom-up including the
     // first non-inline frame.
@@ -467,7 +490,10 @@ Error RawMemProfReader::mapRawProfileToRecords() {
     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
     IndexedMemProfRecord &Record = Result.first->second;
     for (LocationPtr Loc : Locs) {
+      CallStackId CSId = hashCallStack(*Loc);
+      CSIdToCallStack.insert({CSId, *Loc});
       Record.CallSites.push_back(*Loc);
+      Record.CallSiteIds.push_back(CSId);
     }
   }
 
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 9cf307472d656..ab9227e9df881 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -21,9 +21,11 @@ using ::llvm::DILineInfo;
 using ::llvm::DILineInfoSpecifier;
 using ::llvm::DILocal;
 using ::llvm::StringRef;
+using ::llvm::memprof::CallStackId;
 using ::llvm::memprof::CallStackMap;
 using ::llvm::memprof::Frame;
 using ::llvm::memprof::FrameId;
+using ::llvm::memprof::IndexedAllocationInfo;
 using ::llvm::memprof::IndexedMemProfRecord;
 using ::llvm::memprof::MemInfoBlock;
 using ::llvm::memprof::MemProfReader;
@@ -36,6 +38,7 @@ using ::llvm::memprof::SegmentEntry;
 using ::llvm::object::SectionedAddress;
 using ::llvm::symbolize::SymbolizableModule;
 using ::testing::Return;
+using ::testing::SizeIs;
 
 class MockSymbolizer : public SymbolizableModule {
 public:
@@ -432,4 +435,86 @@ TEST(MemProf, BaseMemProfReader) {
   EXPECT_THAT(Records[0].AllocSites[0].CallStack[1],
               FrameContains("bar", 10U, 2U, false));
 }
+
+TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
+  // Verify that MemProfRecord can be constructed from IndexedMemProfRecord with
+  // CallStackIds only.
+
+  llvm::DenseMap<FrameId, Frame> FrameIdMap;
+  Frame F1(1, 0, 0, false);
+  Frame F2(2, 0, 0, false);
+  Frame F3(3, 0, 0, false);
+  Frame F4(4, 0, 0, false);
+  FrameIdMap.insert({F1.hash(), F1});
+  FrameIdMap.insert({F2.hash(), F2});
+  FrameIdMap.insert({F3.hash(), F3});
+  FrameIdMap.insert({F4.hash(), F4});
+
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CallStackIdMap;
+  llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()};
+  llvm::SmallVector<FrameId> CS2 = {F1.hash(), F3.hash()};
+  llvm::SmallVector<FrameId> CS3 = {F2.hash(), F3.hash()};
+  llvm::SmallVector<FrameId> CS4 = {F2.hash(), F4.hash()};
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS1), CS1});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS2), CS2});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS3), CS3});
+  CallStackIdMap.insert({llvm::memprof::hashCallStack(CS4), CS4});
+
+  IndexedMemProfRecord IndexedRecord;
+  IndexedAllocationInfo AI;
+  AI.CSId = llvm::memprof::hashCallStack(CS1);
+  IndexedRecord.AllocSites.push_back(AI);
+  AI.CSId = llvm::memprof::hashCallStack(CS2);
+  IndexedRecord.AllocSites.push_back(AI);
+  IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS3));
+  IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS4));
+
+  bool CSIdMissing = false;
+  bool FrameIdMissing = false;
+
+  auto Callback = [&](CallStackId CSId) -> llvm::SmallVector<Frame> {
+    llvm::SmallVector<Frame> CallStack;
+    llvm::SmallVector<FrameId> FrameIds;
+
+    auto Iter = CallStackIdMap.find(CSId);
+    if (Iter == CallStackIdMap.end())
+      CSIdMissing = true;
+    else
+      FrameIds = Iter->second;
+
+    for (FrameId Id : FrameIds) {
+      Frame F(0, 0, 0, false);
+      auto Iter = FrameIdMap.find(Id);
+      if (Iter == FrameIdMap.end())
+        FrameIdMissing = true;
+      else
+        F = Iter->second;
+      CallStack.push_back(F);
+    }
+
+    return CallStack;
+  };
+
+  MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback);
+
+  // Make sure that all lookups are successful.
+  ASSERT_FALSE(CSIdMissing);
+  ASSERT_FALSE(FrameIdMissing);
+
+  // Verify the contents of Record.
+  ASSERT_THAT(Record.AllocSites, SizeIs(2));
+  ASSERT_THAT(Record.AllocSites[0].CallStack, SizeIs(2));
+  EXPECT_EQ(Record.AllocSites[0].CallStack[0].hash(), F1.hash());
+  EXPECT_EQ(Record.AllocSites[0].CallStack[1].hash(), F2.hash());
+  ASSERT_THAT(Record.AllocSites[1].CallStack, SizeIs(2));
+  EXPECT_EQ(Record.AllocSites[1].CallStack[0].hash(), F1.hash());
+  EXPECT_EQ(Record.AllocSites[1].CallStack[1].hash(), F3.hash());
+  ASSERT_THAT(Record.CallSites, SizeIs(2));
+  ASSERT_THAT(Record.CallSites[0], SizeIs(2));
+  EXPECT_EQ(Record.CallSites[0][0].hash(), F2.hash());
+  EXPECT_EQ(Record.CallSites[0][1].hash(), F3.hash());
+  ASSERT_THAT(Record.CallSites[1], SizeIs(2));
+  EXPECT_EQ(Record.CallSites[1][0].hash(), F2.hash());
+  EXPECT_EQ(Record.CallSites[1][1].hash(), F4.hash());
+}
 } // namespace

From 8cd8ebe153391993a3668d2ac8d2994d9491f3ef Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 16 Apr 2024 10:25:02 -0700
Subject: [PATCH 24/58] [MLGO] Use double comparison facilities for reg alloc
 scoring tests (#88862)

This patch switches from using direct equality (ASSERT_EQ) to the
floating point comparison facilities (ASSERT_DOUBLE_EQ) within google
test to avoid weird floating point problems. There is at least one
downstream that maintains a patch for issues cropping up from the direct
equality.


https://gitlab.alpinelinux.org/alpine/aports/-/blob/master/main/llvm17/allocscore.patch
---
 llvm/unittests/CodeGen/RegAllocScoreTest.cpp | 27 ++++++++++----------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/unittests/CodeGen/RegAllocScoreTest.cpp b/llvm/unittests/CodeGen/RegAllocScoreTest.cpp
index ff7146eaf9439..eae517f9d01cf 100644
--- a/llvm/unittests/CodeGen/RegAllocScoreTest.cpp
+++ b/llvm/unittests/CodeGen/RegAllocScoreTest.cpp
@@ -166,19 +166,20 @@ TEST(RegAllocScoreTest, Counts) {
   ASSERT_EQ(MF->size(), 2U);
   const auto TotalScore =
       llvm::calculateRegAllocScore(*MF, MBBFreqMock, IsRemat);
-  ASSERT_EQ(Freq1, TotalScore.copyCounts());
-  ASSERT_EQ(2.0 * Freq1 + Freq2, TotalScore.loadCounts());
-  ASSERT_EQ(Freq1 + Freq2, TotalScore.storeCounts());
-  ASSERT_EQ(Freq2, TotalScore.loadStoreCounts());
-  ASSERT_EQ(Freq1, TotalScore.cheapRematCounts());
-  ASSERT_EQ(Freq2, TotalScore.expensiveRematCounts());
-  ASSERT_EQ(TotalScore.getScore(),
-            TotalScore.copyCounts() * CopyWeight +
-                TotalScore.loadCounts() * LoadWeight +
-                TotalScore.storeCounts() * StoreWeight +
-                TotalScore.loadStoreCounts() * (LoadWeight + StoreWeight) +
-                TotalScore.cheapRematCounts() * CheapRematWeight +
-                TotalScore.expensiveRematCounts() * ExpensiveRematWeight
+  ASSERT_DOUBLE_EQ(Freq1, TotalScore.copyCounts());
+  ASSERT_DOUBLE_EQ(2.0 * Freq1 + Freq2, TotalScore.loadCounts());
+  ASSERT_DOUBLE_EQ(Freq1 + Freq2, TotalScore.storeCounts());
+  ASSERT_DOUBLE_EQ(Freq2, TotalScore.loadStoreCounts());
+  ASSERT_DOUBLE_EQ(Freq1, TotalScore.cheapRematCounts());
+  ASSERT_DOUBLE_EQ(Freq2, TotalScore.expensiveRematCounts());
+  ASSERT_DOUBLE_EQ(
+      TotalScore.getScore(),
+      TotalScore.copyCounts() * CopyWeight +
+          TotalScore.loadCounts() * LoadWeight +
+          TotalScore.storeCounts() * StoreWeight +
+          TotalScore.loadStoreCounts() * (LoadWeight + StoreWeight) +
+          TotalScore.cheapRematCounts() * CheapRematWeight +
+          TotalScore.expensiveRematCounts() * ExpensiveRematWeight
 
   );
 }

From 75054525ae58f26c86e418382164540760871186 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 16 Apr 2024 18:42:23 +0200
Subject: [PATCH 25/58] [clang][Interp] Implement VectorSplat casts

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 29 ++++++++++++++++++++++++
 clang/lib/AST/Interp/ByteCodeExprGen.h   |  9 +++++++-
 clang/test/AST/Interp/vectors.cpp        | 17 ++++++++++++--
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 00c4a9f161304..6b4b51aac41e8 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -398,6 +398,35 @@ bool ByteCodeExprGen<Emitter>::VisitCastExpr(const CastExpr *CE) {
     return true;
   }
 
+  case CK_VectorSplat: {
+    assert(!classify(CE->getType()));
+    assert(classify(SubExpr->getType()));
+    assert(CE->getType()->isVectorType());
+
+    if (DiscardResult)
+      return this->discard(SubExpr);
+
+    assert(Initializing); // FIXME: Not always correct.
+    const auto *VT = CE->getType()->getAs<VectorType>();
+    PrimType ElemT = classifyPrim(SubExpr);
+    unsigned ElemOffset = allocateLocalPrimitive(
+        SubExpr, ElemT, /*IsConst=*/true, /*IsExtended=*/false);
+
+    if (!this->visit(SubExpr))
+      return false;
+    if (!this->emitSetLocal(ElemT, ElemOffset, CE))
+      return false;
+
+    for (unsigned I = 0; I != VT->getNumElements(); ++I) {
+      if (!this->emitGetLocal(ElemT, ElemOffset, CE))
+        return false;
+      if (!this->emitInitElem(ElemT, I, CE))
+        return false;
+    }
+
+    return true;
+  }
+
   case CK_ToVoid:
     return discard(SubExpr);
 
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index db0d73ce23f7c..7e9dc8631fc0d 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -148,13 +148,20 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
     return Ctx.classify(Ty);
   }
 
-  /// Classifies a known primitive type
+  /// Classifies a known primitive type.
   PrimType classifyPrim(QualType Ty) const {
     if (auto T = classify(Ty)) {
       return *T;
     }
     llvm_unreachable("not a primitive type");
   }
+  /// Classifies a known primitive expression.
+  PrimType classifyPrim(const Expr *E) const {
+    if (auto T = classify(E))
+      return *T;
+    llvm_unreachable("not a primitive type");
+  }
+
   /// Evaluates an expression and places the result on the stack. If the
   /// expression is of composite type, a local variable will be created
   /// and a pointer to said variable will be placed on the stack.
diff --git a/clang/test/AST/Interp/vectors.cpp b/clang/test/AST/Interp/vectors.cpp
index 6c5d916f51f56..5c4694f122d81 100644
--- a/clang/test/AST/Interp/vectors.cpp
+++ b/clang/test/AST/Interp/vectors.cpp
@@ -1,10 +1,23 @@
 // RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s
 // RUN: %clang_cc1 -verify=ref,both %s
 
-// ref-no-diagnostics
-
 typedef int __attribute__((vector_size(16))) VI4;
 constexpr VI4 A = {1,2,3,4};
+static_assert(A[0] == 1, ""); // ref-error {{not an integral constant expression}}
+static_assert(A[1] == 2, ""); // ref-error {{not an integral constant expression}}
+static_assert(A[2] == 3, ""); // ref-error {{not an integral constant expression}}
+static_assert(A[3] == 4, ""); // ref-error {{not an integral constant expression}}
+
+/// VectorSplat casts
+typedef __attribute__(( ext_vector_type(4) )) float float4;
+constexpr float4 vec4_0 = (float4)0.5f;
+static_assert(vec4_0[0] == 0.5, ""); // ref-error {{not an integral constant expression}}
+static_assert(vec4_0[1] == 0.5, ""); // ref-error {{not an integral constant expression}}
+static_assert(vec4_0[2] == 0.5, ""); // ref-error {{not an integral constant expression}}
+static_assert(vec4_0[3] == 0.5, ""); // ref-error {{not an integral constant expression}}
+constexpr int vec4_0_discarded = ((float4)12.0f, 0);
+
+
 
 /// From constant-expression-cxx11.cpp
 namespace Vector {

From 184ba038ac1d444980b3e554b0057f3f30c516ab Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Tue, 16 Apr 2024 10:46:27 -0700
Subject: [PATCH 26/58] [RISCV] Avoid matching 3/5/9 * 2^N as 2^N + 2/4/8 (e.g.
 24) (#88937)

The former is better as a zero extend can be folded into the sll,
whereas the later currently produces a seperate zext.w due to bad
interactions with other combines.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  6 ++++++
 llvm/test/CodeGen/RISCV/addimm-mulimm.ll    |  9 ++++-----
 llvm/test/CodeGen/RISCV/rv64zba.ll          | 22 +++++++++++++++++++++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 765838aafb58d..de2ad639f0d6c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     return SDValue();
   uint64_t MulAmt = CNode->getZExtValue();
 
+  // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
+  // Matched in tablegen, avoid perturbing patterns.
+  for (uint64_t Divisor : {3, 5, 9})
+    if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor))
+      return SDValue();
+
   // If this is a power 2 + 2/4/8, we can use a shift followed by a single
   // shXadd. First check if this a sum of two power of 2s because that's
   // easy. Then count how many zeros are up to the first bit.
diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
index 10103f071462c..48fa69e104565 100644
--- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
@@ -551,9 +551,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
 ; RV32IMB-NEXT:    sh3add a1, a1, a2
 ; RV32IMB-NEXT:    sh1add a0, a0, a0
 ; RV32IMB-NEXT:    slli a2, a0, 3
-; RV32IMB-NEXT:    li a3, 1
-; RV32IMB-NEXT:    slli a3, a3, 11
-; RV32IMB-NEXT:    sh3add a0, a0, a3
+; RV32IMB-NEXT:    addi a0, a2, 2047
+; RV32IMB-NEXT:    addi a0, a0, 1
 ; RV32IMB-NEXT:    sltu a2, a0, a2
 ; RV32IMB-NEXT:    add a1, a1, a2
 ; RV32IMB-NEXT:    ret
@@ -562,8 +561,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
 ; RV64IMB:       # %bb.0:
 ; RV64IMB-NEXT:    addi a0, a0, 86
 ; RV64IMB-NEXT:    sh1add a0, a0, a0
-; RV64IMB-NEXT:    slli a0, a0, 3
-; RV64IMB-NEXT:    addi a0, a0, -16
+; RV64IMB-NEXT:    li a1, -16
+; RV64IMB-NEXT:    sh3add a0, a0, a1
 ; RV64IMB-NEXT:    ret
   %tmp0 = mul i64 %x, 24
   %tmp1 = add i64 %tmp0, 2048
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a84b9e5e7962f..c3c757656be93 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -2490,3 +2490,25 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
   %gep2 = getelementptr i64, ptr %gep1, i64 %a1
   ret ptr %gep2
 }
+
+define i64 @regression(i32 signext %x, i32 signext %y) {
+; RV64I-LABEL: regression:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    subw a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    li a1, 3
+; RV64I-NEXT:    slli a1, a1, 35
+; RV64I-NEXT:    mulhu a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: regression:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    subw a0, a0, a1
+; RV64ZBA-NEXT:    slli.uw a0, a0, 3
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    ret
+  %sub = sub i32 %x, %y
+  %ext = zext i32 %sub to i64
+  %res = mul nuw nsw i64 %ext, 24
+  ret i64 %res
+}

From 4082a7554521572a65a5a0008c4661a534df659d Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Tue, 16 Apr 2024 13:48:13 -0400
Subject: [PATCH 27/58] Improve stack usage to increase recursive
 initialization depth (#88546)

We were crashing due to stack exhaustion on rather reasonable C++
template code. After some investigation, I found that we have a
stack-allocated object that was huge: `InitializationSequence` was 7016
bytes. This caused an overflow with deep call stacks in initialization
code.

With these change, `InitializationSequence` is now 248 bytes.

With the original code, testing RelWithDebInfo on Windows 10, all the
tests in SemaCXX took about 6s 800ms. The max template depth I could
reach on my machine using the code in the issue was 708. After that, I
would get `-Wstack-exhausted` warnings until crashing at 976
instantiations.

With these changes on the same machine, all the tests in SemaCXX took
about 6s 500ms. The max template depth I could reach was 1492. After
that, I would get `-Wstack-exhausted` warnings until crashing at 2898
instantiations.

This improves the behavior of #88330 but there's still an outstanding
question of why we run out of stack space and crash in some
circumstances before we're able to issue a diagnostic about stack space
exhaustion.
---
 clang/docs/ReleaseNotes.rst               |  6 ++
 clang/include/clang/Sema/Initialization.h |  6 +-
 clang/include/clang/Sema/Overload.h       | 70 ++++++-----------------
 clang/lib/Sema/SemaInit.cpp               | 26 +++++----
 clang/lib/Sema/SemaOverload.cpp           | 21 +++----
 5 files changed, 54 insertions(+), 75 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d8ec8bcb8df53..e6c345a2f5c0f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -203,6 +203,12 @@ Non-comprehensive list of changes in this release
 - ``__typeof_unqual__`` is available in all C modes as an extension, which behaves
   like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``.
 
+- Improved stack usage with C++ initialization code. This allows significantly
+  more levels of recursive initialization before reaching stack exhaustion
+  limits. This will positively impact recursive template instantiation code,
+  but should also reduce memory overhead for initializations in general.
+  Fixes #GH88330
+
 New Compiler Flags
 ------------------
 - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 2072cd8d1c3ef..1ceacf0f49f56 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -1134,7 +1134,7 @@ class InitializationSequence {
   OverloadingResult FailedOverloadResult;
 
   /// The candidate set created when initialization failed.
-  OverloadCandidateSet FailedCandidateSet;
+  std::unique_ptr<OverloadCandidateSet> FailedCandidateSet;
 
   /// The incomplete type that caused a failure.
   QualType FailedIncompleteType;
@@ -1403,7 +1403,9 @@ class InitializationSequence {
   /// Retrieve a reference to the candidate set when overload
   /// resolution fails.
   OverloadCandidateSet &getFailedCandidateSet() {
-    return FailedCandidateSet;
+    assert(FailedCandidateSet &&
+           "this should have been allocated in the constructor!");
+    return *FailedCandidateSet;
   }
 
   /// Get the overloading result, for when the initialization
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 76311b00d2fc5..e6f88bbf7c4f4 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -37,6 +37,7 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <memory>
 #include <utility>
 
 namespace clang {
@@ -874,7 +875,8 @@ class Sema;
     ConversionFixItGenerator Fix;
 
     /// Viable - True to indicate that this overload candidate is viable.
-    bool Viable : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned Viable : 1;
 
     /// Whether this candidate is the best viable function, or tied for being
     /// the best viable function.
@@ -883,12 +885,14 @@ class Sema;
     /// was part of the ambiguity kernel: the minimal non-empty set of viable
     /// candidates such that all elements of the ambiguity kernel are better
     /// than all viable candidates not in the ambiguity kernel.
-    bool Best : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned Best : 1;
 
     /// IsSurrogate - True to indicate that this candidate is a
     /// surrogate for a conversion to a function pointer or reference
     /// (C++ [over.call.object]).
-    bool IsSurrogate : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned IsSurrogate : 1;
 
     /// IgnoreObjectArgument - True to indicate that the first
     /// argument's conversion, which for this function represents the
@@ -897,18 +901,20 @@ class Sema;
     /// implicit object argument is just a placeholder) or a
     /// non-static member function when the call doesn't have an
     /// object argument.
-    bool IgnoreObjectArgument : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned IgnoreObjectArgument : 1;
 
     /// True if the candidate was found using ADL.
-    CallExpr::ADLCallKind IsADLCandidate : 1;
+    LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
+    unsigned IsADLCandidate : 1;
 
     /// Whether this is a rewritten candidate, and if so, of what kind?
     LLVM_PREFERRED_TYPE(OverloadCandidateRewriteKind)
     unsigned RewriteKind : 2;
 
     /// FailureKind - The reason why this candidate is not viable.
-    /// Actually an OverloadFailureKind.
-    unsigned char FailureKind;
+    LLVM_PREFERRED_TYPE(OverloadFailureKind)
+    unsigned FailureKind : 5;
 
     /// The number of call arguments that were explicitly provided,
     /// to be used while performing partial ordering of function templates.
@@ -972,7 +978,9 @@ class Sema;
   private:
     friend class OverloadCandidateSet;
     OverloadCandidate()
-        : IsSurrogate(false), IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
+        : IsSurrogate(false),
+          IsADLCandidate(static_cast<unsigned>(CallExpr::NotADL)),
+          RewriteKind(CRK_None) {}
   };
 
   /// OverloadCandidateSet - A set of overload candidates, used in C++
@@ -1070,51 +1078,16 @@ class Sema;
     };
 
   private:
-    SmallVector<OverloadCandidate, 16> Candidates;
-    llvm::SmallPtrSet<uintptr_t, 16> Functions;
-
-    // Allocator for ConversionSequenceLists. We store the first few of these
-    // inline to avoid allocation for small sets.
-    llvm::BumpPtrAllocator SlabAllocator;
+    SmallVector<OverloadCandidate, 4> Candidates;
+    llvm::SmallPtrSet<uintptr_t, 4> Functions;
 
     SourceLocation Loc;
     CandidateSetKind Kind;
     OperatorRewriteInfo RewriteInfo;
 
-    constexpr static unsigned NumInlineBytes =
-        24 * sizeof(ImplicitConversionSequence);
-    unsigned NumInlineBytesUsed = 0;
-    alignas(void *) char InlineSpace[NumInlineBytes];
-
     // Address space of the object being constructed.
     LangAS DestAS = LangAS::Default;
 
-    /// If we have space, allocates from inline storage. Otherwise, allocates
-    /// from the slab allocator.
-    /// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
-    /// instead.
-    /// FIXME: Now that this only allocates ImplicitConversionSequences, do we
-    /// want to un-generalize this?
-    template <typename T>
-    T *slabAllocate(unsigned N) {
-      // It's simpler if this doesn't need to consider alignment.
-      static_assert(alignof(T) == alignof(void *),
-                    "Only works for pointer-aligned types.");
-      static_assert(std::is_trivial<T>::value ||
-                        std::is_same<ImplicitConversionSequence, T>::value,
-                    "Add destruction logic to OverloadCandidateSet::clear().");
-
-      unsigned NBytes = sizeof(T) * N;
-      if (NBytes > NumInlineBytes - NumInlineBytesUsed)
-        return SlabAllocator.Allocate<T>(N);
-      char *FreeSpaceStart = InlineSpace + NumInlineBytesUsed;
-      assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
-             "Misaligned storage!");
-
-      NumInlineBytesUsed += NBytes;
-      return reinterpret_cast<T *>(FreeSpaceStart);
-    }
-
     void destroyCandidates();
 
   public:
@@ -1163,12 +1136,7 @@ class Sema;
     ConversionSequenceList
     allocateConversionSequences(unsigned NumConversions) {
       ImplicitConversionSequence *Conversions =
-          slabAllocate<ImplicitConversionSequence>(NumConversions);
-
-      // Construct the new objects.
-      for (unsigned I = 0; I != NumConversions; ++I)
-        new (&Conversions[I]) ImplicitConversionSequence();
-
+          new ImplicitConversionSequence[NumConversions];
       return ConversionSequenceList(Conversions, NumConversions);
     }
 
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index fb7a80ab02846..791c0b6e6df23 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -6114,7 +6114,8 @@ InitializationSequence::InitializationSequence(
     Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind,
     MultiExprArg Args, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid)
     : FailedOverloadResult(OR_Success),
-      FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
+      FailedCandidateSet(new OverloadCandidateSet(
+          Kind.getLocation(), OverloadCandidateSet::CSK_Normal)) {
   InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
                  TreatUnavailableAsInvalid);
 }
@@ -9735,7 +9736,7 @@ bool InitializationSequence::Diagnose(Sema &S,
     switch (FailedOverloadResult) {
     case OR_Ambiguous:
 
-      FailedCandidateSet.NoteCandidates(
+      FailedCandidateSet->NoteCandidates(
           PartialDiagnosticAt(
               Kind.getLocation(),
               Failure == FK_UserConversionOverloadFailed
@@ -9749,7 +9750,8 @@ bool InitializationSequence::Diagnose(Sema &S,
       break;
 
     case OR_No_Viable_Function: {
-      auto Cands = FailedCandidateSet.CompleteCandidates(S, OCD_AllCandidates, Args);
+      auto Cands =
+          FailedCandidateSet->CompleteCandidates(S, OCD_AllCandidates, Args);
       if (!S.RequireCompleteType(Kind.getLocation(),
                                  DestType.getNonReferenceType(),
                           diag::err_typecheck_nonviable_condition_incomplete,
@@ -9759,13 +9761,13 @@ bool InitializationSequence::Diagnose(Sema &S,
           << OnlyArg->getType() << Args[0]->getSourceRange()
           << DestType.getNonReferenceType();
 
-      FailedCandidateSet.NoteCandidates(S, Args, Cands);
+      FailedCandidateSet->NoteCandidates(S, Args, Cands);
       break;
     }
     case OR_Deleted: {
       OverloadCandidateSet::iterator Best;
-      OverloadingResult Ovl
-        = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+      OverloadingResult Ovl =
+          FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
 
       StringLiteral *Msg = Best->Function->getDeletedMessage();
       S.Diag(Kind.getLocation(), diag::err_typecheck_deleted_function)
@@ -9949,7 +9951,7 @@ bool InitializationSequence::Diagnose(Sema &S,
     // bad.
     switch (FailedOverloadResult) {
       case OR_Ambiguous:
-        FailedCandidateSet.NoteCandidates(
+        FailedCandidateSet->NoteCandidates(
             PartialDiagnosticAt(Kind.getLocation(),
                                 S.PDiag(diag::err_ovl_ambiguous_init)
                                     << DestType << ArgsRange),
@@ -10003,7 +10005,7 @@ bool InitializationSequence::Diagnose(Sema &S,
           break;
         }
 
-        FailedCandidateSet.NoteCandidates(
+        FailedCandidateSet->NoteCandidates(
             PartialDiagnosticAt(
                 Kind.getLocation(),
                 S.PDiag(diag::err_ovl_no_viable_function_in_init)
@@ -10013,8 +10015,8 @@ bool InitializationSequence::Diagnose(Sema &S,
 
       case OR_Deleted: {
         OverloadCandidateSet::iterator Best;
-        OverloadingResult Ovl
-          = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+        OverloadingResult Ovl =
+            FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
         if (Ovl != OR_Deleted) {
           S.Diag(Kind.getLocation(), diag::err_ovl_deleted_init)
               << DestType << ArgsRange;
@@ -10093,8 +10095,8 @@ bool InitializationSequence::Diagnose(Sema &S,
     S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
       << Args[0]->getSourceRange();
     OverloadCandidateSet::iterator Best;
-    OverloadingResult Ovl
-      = FailedCandidateSet.BestViableFunction(S, Kind.getLocation(), Best);
+    OverloadingResult Ovl =
+        FailedCandidateSet->BestViableFunction(S, Kind.getLocation(), Best);
     (void)Ovl;
     assert(Ovl == OR_Success && "Inconsistent overload resolution");
     CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 227ef564ba3e0..bcde0d86cf10f 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1057,8 +1057,7 @@ bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
 
 void OverloadCandidateSet::destroyCandidates() {
   for (iterator i = begin(), e = end(); i != e; ++i) {
-    for (auto &C : i->Conversions)
-      C.~ImplicitConversionSequence();
+    delete[] i->Conversions.data();
     if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction)
       i->DeductionFailure.Destroy();
   }
@@ -1066,8 +1065,6 @@ void OverloadCandidateSet::destroyCandidates() {
 
 void OverloadCandidateSet::clear(CandidateSetKind CSK) {
   destroyCandidates();
-  SlabAllocator.Reset();
-  NumInlineBytesUsed = 0;
   Candidates.clear();
   Functions.clear();
   Kind = CSK;
@@ -6983,7 +6980,7 @@ void Sema::AddOverloadCandidate(
   Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
   Candidate.IsSurrogate = false;
-  Candidate.IsADLCandidate = IsADLCandidate;
+  Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
   Candidate.IgnoreObjectArgument = false;
   Candidate.ExplicitCallArguments = Args.size();
 
@@ -7815,7 +7812,7 @@ void Sema::AddTemplateOverloadCandidate(
     Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Candidate.Function, PO);
     Candidate.IsSurrogate = false;
-    Candidate.IsADLCandidate = IsADLCandidate;
+    Candidate.IsADLCandidate = static_cast<unsigned>(IsADLCandidate);
     // Ignore the object argument if there is one, since we don't have an object
     // type.
     Candidate.IgnoreObjectArgument =
@@ -14125,7 +14122,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
       return ExprError();
     return SemaRef.BuildResolvedCallExpr(
         Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
-        /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+        /*IsExecConfig=*/false,
+        static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
   }
 
   case OR_No_Viable_Function: {
@@ -14184,7 +14182,8 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
       return ExprError();
     return SemaRef.BuildResolvedCallExpr(
         Res.get(), FDecl, LParenLoc, Args, RParenLoc, ExecConfig,
-        /*IsExecConfig=*/false, (*Best)->IsADLCandidate);
+        /*IsExecConfig=*/false,
+        static_cast<CallExpr::ADLCallKind>((*Best)->IsADLCandidate));
   }
   }
 
@@ -14491,7 +14490,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
       Args[0] = Input;
       CallExpr *TheCall = CXXOperatorCallExpr::Create(
           Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc,
-          CurFPFeatureOverrides(), Best->IsADLCandidate);
+          CurFPFeatureOverrides(),
+          static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
 
       if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
         return ExprError();
@@ -14909,7 +14909,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         // members; CodeGen should take care not to emit the this pointer.
         TheCall = CXXOperatorCallExpr::Create(
             Context, ChosenOp, FnExpr.get(), Args, ResultTy, VK, OpLoc,
-            CurFPFeatureOverrides(), Best->IsADLCandidate);
+            CurFPFeatureOverrides(),
+            static_cast<CallExpr::ADLCallKind>(Best->IsADLCandidate));
 
         if (const auto *Method = dyn_cast<CXXMethodDecl>(FnDecl);
             Method && Method->isImplicitObjectMemberFunction()) {

From aefff774a0d6f75565243263555f2513ac3c9fdf Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav@gmail.com>
Date: Tue, 16 Apr 2024 21:50:22 +0400
Subject: [PATCH 28/58] [clang] Migrate DR tests to `static_assert` (#88611)

This patch touches a number of tests that run in C++98 mode that have
been using array size as a context that requires a constant expression,
replacing it with a `static_assert` backported via a macro. This reduces
noise in expected directives that comes from diagnostics around VLAs.

This patch also showcases that DR tests would benefit from folding in
constant expressions in C++98 mode, but I'm not sure it's even on the
table. If it is, I'd be happy to prepare a PR for that, and rebase this
PR on top of it.

CC @AaronBallman
---
 clang/test/CXX/drs/dr0xx.cpp  | 14 +++++---
 clang/test/CXX/drs/dr16xx.cpp |  5 ++-
 clang/test/CXX/drs/dr1xx.cpp  | 61 +++++++++++++++++++----------------
 clang/test/CXX/drs/dr2xx.cpp  | 15 ++++++---
 clang/test/CXX/drs/dr3xx.cpp  | 38 ++++++++++++----------
 clang/test/CXX/drs/dr4xx.cpp  | 60 +++++++++++++++++-----------------
 clang/test/CXX/drs/dr5xx.cpp  | 13 +++++---
 clang/test/CXX/drs/dr6xx.cpp  |  6 ++--
 8 files changed, 117 insertions(+), 95 deletions(-)

diff --git a/clang/test/CXX/drs/dr0xx.cpp b/clang/test/CXX/drs/dr0xx.cpp
index a304862885c64..6c600bbc7c3f6 100644
--- a/clang/test/CXX/drs/dr0xx.cpp
+++ b/clang/test/CXX/drs/dr0xx.cpp
@@ -5,6 +5,11 @@
 // RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple
 // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
 namespace cwg1 { // cwg1: no
   namespace X { extern "C" void cwg1_f(int a = 1); }
   namespace Y { extern "C" void cwg1_f(int a = 1); }
@@ -897,7 +902,7 @@ namespace cwg54 { // cwg54: 2.8
 
 namespace cwg55 { // cwg55: yes
   enum E { e = 5 };
-  int test[(e + 1 == 6) ? 1 : -1];
+  static_assert(e + 1 == 6, "");
 }
 
 namespace cwg56 { // cwg56: yes
@@ -1163,10 +1168,9 @@ namespace cwg75 { // cwg75: yes
 
 namespace cwg76 { // cwg76: yes
   const volatile int n = 1;
-  int arr[n]; // #cwg76-vla
-  // expected-error@#cwg76-vla {{variable length arrays in C++ are a Clang extension}}
-  //   expected-note@#cwg76-vla {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}}
-  // expected-error@#cwg76-vla {{variable length array declaration not allowed at file scope}}
+  static_assert(n, "");
+  // expected-error@-1 {{static assertion expression is not an integral constant expression}}
+  //   expected-note@-2 {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}}
 }
 
 namespace cwg77 { // cwg77: yes
diff --git a/clang/test/CXX/drs/dr16xx.cpp b/clang/test/CXX/drs/dr16xx.cpp
index 6d7bb7619f8b8..cf6b45ceabf2c 100644
--- a/clang/test/CXX/drs/dr16xx.cpp
+++ b/clang/test/CXX/drs/dr16xx.cpp
@@ -153,10 +153,9 @@ namespace cwg1645 { // cwg1645: 3.9
 
 namespace cwg1652 { // cwg1652: 3.6
   int a, b;
-  int arr[&a + 1 == &b ? 1 : 2];
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
+  static_assert(&a + 1 == &b, "");
+  // expected-error@-1 {{static assertion expression is not an integral constant expression}}
   //   expected-note@-2 {{comparison against pointer '&a + 1' that points past the end of a complete object has unspecified value}}
-  // expected-error@-3 {{variable length array declaration not allowed at file scope}}
 }
 
 namespace cwg1653 { // cwg1653: 4 c++17
diff --git a/clang/test/CXX/drs/dr1xx.cpp b/clang/test/CXX/drs/dr1xx.cpp
index 5b497dda047d6..a8f9b705a9866 100644
--- a/clang/test/CXX/drs/dr1xx.cpp
+++ b/clang/test/CXX/drs/dr1xx.cpp
@@ -5,6 +5,17 @@
 // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors
 // RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-unknown %s -verify=expected,since-cxx11,since-cxx17 -fexceptions -fcxx-exceptions -pedantic-errors
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
+#if __cplusplus == 199711L
+#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x))
+#else
+#define __enable_constant_folding
+#endif
+
 namespace cwg100 { // cwg100: yes
   template<const char (*)[4]> struct A {}; // #cwg100-A
   template<const char (&)[4]> struct B {}; // #cwg100-B
@@ -736,8 +747,8 @@ namespace cwg147 { // cwg147: yes
 
 namespace cwg148 { // cwg148: yes
   struct A { int A::*p; };
-  int check1[__is_pod(int(A::*)) ? 1 : -1];
-  int check2[__is_pod(A) ? 1 : -1];
+  static_assert(__is_pod(int(A::*)), "");
+  static_assert(__is_pod(A), "");
 }
 
 // cwg149: na
@@ -745,13 +756,7 @@ namespace cwg148 { // cwg148: yes
 namespace cwg151 { // cwg151: 3.1
   struct X {};
   typedef int X::*p;
-#if __cplusplus < 201103L
-#define fold(x) (__builtin_constant_p(0) ? (x) : (x))
-#else
-#define fold
-#endif
-  int check[fold(p() == 0) ? 1 : -1];
-#undef fold
+  static_assert(__enable_constant_folding(p() == 0), "");
 }
 
 namespace cwg152 { // cwg152: yes
@@ -956,42 +961,42 @@ namespace cwg171 {
 
 namespace cwg172 { // cwg172: yes
   enum { zero };
-  int check1[-1 < zero ? 1 : -1];
+  static_assert(-1 < zero, "");
 
   enum { x = -1, y = (unsigned int)-1 };
-  int check2[sizeof(x) > sizeof(int) ? 1 : -1];
+  static_assert(sizeof(x) > sizeof(int), "");
 
   enum { a = (unsigned int)-1 / 2 };
-  int check3a[sizeof(a) == sizeof(int) ? 1 : -1];
-  int check3b[-a < 0 ? 1 : -1];
+  static_assert(sizeof(a) == sizeof(int), "");
+  static_assert(-a < 0, "");
 
   enum { b = (unsigned int)-1 / 2 + 1 };
-  int check4a[sizeof(b) == sizeof(unsigned int) ? 1 : -1];
-  int check4b[-b > 0 ? 1 : -1];
+  static_assert(sizeof(b) == sizeof(unsigned int), "");
+  static_assert(-b > 0, "");
 
   enum { c = (unsigned long)-1 / 2 };
-  int check5a[sizeof(c) == sizeof(long) ? 1 : -1];
-  int check5b[-c < 0 ? 1 : -1];
+  static_assert(sizeof(c) == sizeof(long), "");
+  static_assert(-c < 0, "");
 
   enum { d = (unsigned long)-1 / 2 + 1 };
-  int check6a[sizeof(d) == sizeof(unsigned long) ? 1 : -1];
-  int check6b[-d > 0 ? 1 : -1];
+  static_assert(sizeof(d) == sizeof(unsigned long), "");
+  static_assert(-d > 0, "");
 
   enum { e = (unsigned long long)-1 / 2 };
   // cxx98-error@-1 {{'long long' is a C++11 extension}}
-  int check7a[sizeof(e) == sizeof(long) ? 1 : -1];
-  int check7b[-e < 0 ? 1 : -1];
+  static_assert(sizeof(e) == sizeof(long), "");
+  static_assert(-e < 0, "");
 
   enum { f = (unsigned long long)-1 / 2 + 1 };
   // cxx98-error@-1 {{'long long' is a C++11 extension}}
-  int check8a[sizeof(f) == sizeof(unsigned long) ? 1 : -1];
-  int check8b[-f > 0 ? 1 : -1];
+  static_assert(sizeof(f) == sizeof(unsigned long), "");
+  static_assert(-f > 0, "");
 }
 
 namespace cwg173 { // cwg173: yes
-  int check[('0' + 1 == '1' && '0' + 2 == '2' && '0' + 3 == '3' &&
-             '0' + 4 == '4' && '0' + 5 == '5' && '0' + 6 == '6' &&
-             '0' + 7 == '7' && '0' + 8 == '8' && '0' + 9 == '9') ? 1 : -1];
+  static_assert('0' + 1 == '1' && '0' + 2 == '2' && '0' + 3 == '3' &&
+                '0' + 4 == '4' && '0' + 5 == '5' && '0' + 6 == '6' &&
+                '0' + 7 == '7' && '0' + 8 == '8' && '0' + 9 == '9', "");
 }
 
 // cwg174: sup 1012
@@ -1070,7 +1075,7 @@ namespace cwg177 { // cwg177: yes
 }
 
 namespace cwg178 { // cwg178: yes
-  int check[int() == 0 ? 1 : -1];
+  static_assert(int() == 0, "");
 #if __cplusplus >= 201103L
   static_assert(int{} == 0, "");
   struct S { int a, b; };
@@ -1180,7 +1185,7 @@ namespace cwg187 { // cwg187: sup 481
 
 namespace cwg188 { // cwg188: yes
   char c[10];
-  int check[sizeof(0, c) == 10 ? 1 : -1];
+  static_assert(sizeof(0, c) == 10, "");
 }
 
 // cwg190 FIXME: add codegen test for tbaa
diff --git a/clang/test/CXX/drs/dr2xx.cpp b/clang/test/CXX/drs/dr2xx.cpp
index e655e7226d51d..5d3e8ce4bea3b 100644
--- a/clang/test/CXX/drs/dr2xx.cpp
+++ b/clang/test/CXX/drs/dr2xx.cpp
@@ -10,10 +10,15 @@
 typedef __SIZE_TYPE__ size_t;
 // cxx98-error@-1 0-1 {{'long long' is a C++11 extension}}
 
-#if __cplusplus < 201103L
-#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
+#if __cplusplus == 199711L
+#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x))
 #else
-#define fold
+#define __enable_constant_folding
 #endif
 
 namespace cwg200 { // cwg200: dup 214
@@ -31,7 +36,7 @@ namespace cwg200 { // cwg200: dup 214
 namespace cwg202 { // cwg202: 3.1
   template<typename T> T f();
   template<int (*g)()> struct X {
-    int arr[fold(g == &f<int>) ? 1 : -1];
+    static_assert(__enable_constant_folding(g == &f<int>), "");
   };
   template struct X<f>;
 }
@@ -1024,7 +1029,7 @@ namespace cwg275 { // cwg275: no
 namespace cwg277 { // cwg277: 3.1
   typedef int *intp;
   int *p = intp();
-  int a[fold(intp() ? -1 : 1)];
+  static_assert(__enable_constant_folding(!intp()), "");
 }
 
 namespace cwg280 { // cwg280: 2.9
diff --git a/clang/test/CXX/drs/dr3xx.cpp b/clang/test/CXX/drs/dr3xx.cpp
index 6d1c6958ac8eb..3e9228fe21fb6 100644
--- a/clang/test/CXX/drs/dr3xx.cpp
+++ b/clang/test/CXX/drs/dr3xx.cpp
@@ -5,6 +5,17 @@
 // RUN: %clang_cc1 -std=c++11 -verify=expected,cxx98-14,cxx98-17,cxx98-20,cxx11-14,since-cxx11 -triple %itanium_abi_triple %s -fexceptions -fcxx-exceptions -pedantic-errors
 // RUN: %clang_cc1 -std=c++98 -verify=expected,cxx98-14,cxx98-17,cxx98-20,cxx98 -triple %itanium_abi_triple %s -fexceptions -fcxx-exceptions -pedantic-errors
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
+#if __cplusplus == 199711L
+#define __enable_constant_folding(x) (__builtin_constant_p(x) ? (x) : (x))
+#else
+#define __enable_constant_folding
+#endif
+
 namespace cwg300 { // cwg300: yes
   template<typename R, typename A> void f(R (&)(A)) {}
   int g(int);
@@ -396,7 +407,7 @@ namespace cwg324 { // cwg324: 3.6
 
 namespace cwg326 { // cwg326: 3.1
   struct S {};
-  int test[__is_trivially_constructible(S, const S&) ? 1 : -1];
+  static_assert(__is_trivially_constructible(S, const S&), "");
 }
 
 namespace cwg327 { // cwg327: dup 538
@@ -653,7 +664,7 @@ namespace cwg339 { // cwg339: 2.8
 
   template<typename T> A<sizeof(f(T()))> make_A();
 
-  int a[conv_int<char>::value ? 1 : -1];
+  static_assert(conv_int<char>::value, "");
   bool b = conv_int2<char>(A<1>());
   A<1> c = make_A<char>();
 }
@@ -1099,21 +1110,14 @@ namespace cwg364 { // cwg364: yes
 #endif
 
 namespace cwg367 { // cwg367: yes
-  // FIXME: These diagnostics are terrible. Don't diagnose an ill-formed global
-  // array as being a VLA!
-  int a[true ? throw 0 : 4];
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
-  // expected-error@-2 {{variable length array declaration not allowed at file scope}}
-  int b[true ? 4 : throw 0];
-  // cxx98-error@-1 {{variable length arrays in C++ are a Clang extension}}
-  // cxx98-error@-2 {{variable length array folded to constant array as an extension}}
-  int c[true ? *new int : 4];
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
+  static_assert(__enable_constant_folding(true ? throw 0 : 4), "");
+  // expected-error@-1 {{expression is not an integral constant expression}}
+  static_assert(__enable_constant_folding(true ? 4 : throw 0), "");
+  static_assert(__enable_constant_folding(true ? *new int : 4), "");
+  // expected-error@-1 {{expression is not an integral constant expression}}
   //   expected-note@-2 {{read of uninitialized object is not allowed in a constant expression}}
-  // expected-error@-3 {{variable length array declaration not allowed at file scope}}
-  int d[true ? 4 : *new int];
-  // cxx98-error@-1 {{variable length arrays in C++ are a Clang extension}}
-  // cxx98-error@-2 {{variable length array folded to constant array as an extension}}
+  static_assert(__enable_constant_folding(true ? 4 : *new int), "");
+
 }
 
 namespace cwg368 { // cwg368: 3.6
@@ -1325,7 +1329,7 @@ namespace cwg383 { // cwg383: yes
   struct B { ~B(); };
   union C { C &operator=(const C&); };
   union D { ~D(); };
-  int check[(__is_pod(A) || __is_pod(B) || __is_pod(C) || __is_pod(D)) ? -1 : 1];
+  static_assert(!__is_pod(A) && !__is_pod(B) && !__is_pod(C) && !__is_pod(D), "");
 }
 
 namespace cwg384 { // cwg384: yes
diff --git a/clang/test/CXX/drs/dr4xx.cpp b/clang/test/CXX/drs/dr4xx.cpp
index 611b791470785..07162cc28f6b6 100644
--- a/clang/test/CXX/drs/dr4xx.cpp
+++ b/clang/test/CXX/drs/dr4xx.cpp
@@ -6,6 +6,11 @@
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %clang_cc1 -std=c++23 %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %clang_cc1 -std=c++2c %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
 // FIXME: __SIZE_TYPE__ expands to 'long long' on some targets.
 __extension__ typedef __SIZE_TYPE__ size_t;
 
@@ -217,7 +222,7 @@ namespace cwg407 { // cwg407: 3.8
 }
 
 namespace cwg408 { // cwg408: 3.4
-  template<int N> void g() { int arr[N != 1 ? 1 : -1]; }
+  template<int N> void g() { static_assert(N != 1, ""); }
   template<> void g<2>() { }
 
   template<typename T> struct S {
@@ -239,7 +244,7 @@ namespace cwg408 { // cwg408: 3.4
   };
   template<typename T> int R<T>::arr[1];
   template<typename T> void R<T>::f() {
-    int arr[sizeof(arr) != sizeof(int) ? 1 : -1];
+    static_assert(sizeof(arr) != sizeof(int), "");
   }
   template<> int R<int>::arr[2];
   template void R<int>::f();
@@ -842,11 +847,10 @@ namespace cwg451 { // cwg451: yes
   // expected-warning@-1 {{division by zero is undefined}}
   const int b = 1 / 0; // #cwg451-b
   // expected-warning@-1 {{division by zero is undefined}}
-  int arr[b]; // #cwg451-arr
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
+  static_assert(b, "");
+  // expected-error@-1 {{expression is not an integral constant expression}}
   //   expected-note@-2 {{initializer of 'b' is not a constant expression}}
   //   expected-note@#cwg451-b {{declared here}}
-  // expected-error@#cwg451-arr {{variable length array declaration not allowed at file scope}}
 }
 
 namespace cwg452 { // cwg452: yes
@@ -876,11 +880,10 @@ namespace cwg456 { // cwg456: yes
 namespace cwg457 { // cwg457: yes
   const int a = 1;
   const volatile int b = 1;
-  int ax[a];
-  int bx[b];
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
+  static_assert(a, "");
+  static_assert(b, "");
+  // expected-error@-1 {{expression is not an integral constant expression}}
   //   expected-note@-2 {{read of volatile-qualified type 'const volatile int' is not allowed in a constant expression}}
-  // expected-error@-3 {{variable length array declaration not allowed at file scope}}
 
   enum E {
     ea = a,
@@ -1276,20 +1279,18 @@ namespace cwg482 { // cwg482: 3.5
 
 namespace cwg483 { // cwg483: yes
   namespace climits {
-    int check1[__SCHAR_MAX__ >= 127 ? 1 : -1];
-    int check2[__SHRT_MAX__ >= 32767 ? 1 : -1];
-    int check3[__INT_MAX__ >= 32767 ? 1 : -1];
-    int check4[__LONG_MAX__ >= 2147483647 ? 1 : -1];
-    int check5[__LONG_LONG_MAX__ >= 9223372036854775807 ? 1 : -1];
-    // cxx98-error@-1 {{'long long' is a C++11 extension}}
-    // cxx98-error@-2 0-1{{'long long' is a C++11 extension}}
+    static_assert(__SCHAR_MAX__ >= 127, "");
+    static_assert(__SHRT_MAX__ >= 32767, "");
+    static_assert(__INT_MAX__ >= 32767, "");
+    static_assert(__LONG_MAX__ >= 2147483647, "");
+    static_assert(__LONG_LONG_MAX__ >= 9223372036854775807, "");
   }
   namespace cstdint {
-    int check1[__PTRDIFF_WIDTH__ >= 16 ? 1 : -1];
-    int check2[__SIG_ATOMIC_WIDTH__ >= 8 ? 1 : -1];
-    int check3[__SIZE_WIDTH__ >= 16 ? 1 : -1];
-    int check4[__WCHAR_WIDTH__ >= 8 ? 1 : -1];
-    int check5[__WINT_WIDTH__ >= 16 ? 1 : -1];
+    static_assert(__PTRDIFF_WIDTH__ >= 16, "");
+    static_assert(__SIG_ATOMIC_WIDTH__ >= 8, "");
+    static_assert(__SIZE_WIDTH__ >= 16, "");
+    static_assert(__WCHAR_WIDTH__ >= 8, "");
+    static_assert(__WINT_WIDTH__ >= 16, "");
   }
 }
 
@@ -1366,11 +1367,10 @@ namespace cwg486 { // cwg486: yes
 namespace cwg487 { // cwg487: yes
   enum E { e };
   int operator+(int, E); // #cwg487-operator-plus
-  int i[4 + e]; // #cwg487-i
-  // expected-error@-1 {{variable length arrays in C++ are a Clang extension}}
+  static_assert(4 + e, "");
+  // expected-error@-1 {{expression is not an integral constant expression}}
   //   since-cxx11-note@-2 {{non-constexpr function 'operator+' cannot be used in a constant expression}}
   //   since-cxx11-note@#cwg487-operator-plus {{declared here}}
-  // expected-error@#cwg487-i {{variable length array declaration not allowed at file scope}}
 }
 
 namespace cwg488 { // cwg488: yes c++11
@@ -1485,13 +1485,13 @@ namespace cwg495 { // cwg495: 3.5
 namespace cwg496 { // cwg496: sup 2094
   struct A { int n; };
   struct B { volatile int n; };
-  int check1[ __is_trivially_copyable(const int) ? 1 : -1];
+  static_assert(__is_trivially_copyable(const int), "");
   // This checks the cwg2094 behavior, not cwg496
-  int check2[ __is_trivially_copyable(volatile int) ? 1 : -1];
-  int check3[ __is_trivially_constructible(A, const A&) ? 1 : -1];
-  int check4[ __is_trivially_constructible(B, const B&) ? 1 : -1];
-  int check5[ __is_trivially_assignable(A, const A&) ? 1 : -1];
-  int check6[ __is_trivially_assignable(B, const B&) ? 1 : -1];
+  static_assert(__is_trivially_copyable(volatile int), "");
+  static_assert(__is_trivially_constructible(A, const A&), "");
+  static_assert(__is_trivially_constructible(B, const B&), "");
+  static_assert(__is_trivially_assignable(A, const A&), "");
+  static_assert(__is_trivially_assignable(B, const B&), "");
 }
 
 namespace cwg497 { // cwg497: sup 253
diff --git a/clang/test/CXX/drs/dr5xx.cpp b/clang/test/CXX/drs/dr5xx.cpp
index 0fe64102d70b0..9d890f981348a 100644
--- a/clang/test/CXX/drs/dr5xx.cpp
+++ b/clang/test/CXX/drs/dr5xx.cpp
@@ -5,6 +5,11 @@
 // RUN: %clang_cc1 -std=c++20 %s -verify=expected,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors
 // RUN: %clang_cc1 -std=c++23 %s -verify=expected,since-cxx23,since-cxx20,since-cxx17,since-cxx11 -fexceptions -fcxx-exceptions -pedantic-errors
 
+#if __cplusplus == 199711L
+#define static_assert(...) __extension__ _Static_assert(__VA_ARGS__)
+// cxx98-error@-1 {{variadic macros are a C99 feature}}
+#endif
+
 // FIXME: This is included to avoid a diagnostic with no source location
 // pointing at the implicit operator new. We can't match such a diagnostic
 // with -verify.
@@ -819,7 +824,7 @@ namespace cwg565 { // cwg565: yes
 
 namespace cwg566 { // cwg566: yes
 #if __cplusplus >= 201103L
-  int check[int(-3.99) == -3 ? 1 : -1];
+  static_assert(int(-3.99) == -3, "");
 #endif
 }
 
@@ -834,7 +839,7 @@ namespace cwg568 { // cwg568: 3.0 c++11
   public:
     int n;
   };
-  int check_trivial[__is_trivial(trivial) ? 1 : -1];
+  static_assert(__is_trivial(trivial), "");
 
   struct std_layout {
     std_layout();
@@ -843,7 +848,7 @@ namespace cwg568 { // cwg568: 3.0 c++11
   private:
     int n;
   };
-  int check_std_layout[__is_standard_layout(std_layout) ? 1 : -1];
+  static_assert(__is_standard_layout(std_layout), "");
 
   struct aggregate {
     int x;
@@ -885,7 +890,7 @@ namespace cwg570 { // cwg570: dup 633
 
 namespace cwg572 { // cwg572: yes
   enum E { a = 1, b = 2 };
-  int check[a + b == 3 ? 1 : -1];
+  static_assert(a + b == 3, "");
 }
 
 namespace cwg573 { // cwg573: no
diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp
index 9d3613ae8589e..069102d9c5975 100644
--- a/clang/test/CXX/drs/dr6xx.cpp
+++ b/clang/test/CXX/drs/dr6xx.cpp
@@ -144,7 +144,7 @@ namespace cwg608 { // cwg608: yes
   struct D : B, C {};
 }
 
-int cwg610[-0u == 0u ? 1 : -1]; // cwg610: yes
+static_assert(-0u == 0u, ""); // cwg610: yes
 
 namespace cwg611 { // cwg611: yes
   int k;
@@ -190,8 +190,8 @@ namespace cwg613 { // cwg613: yes c++11
   }
 }
 
-int cwg614_a[(-1) / 2 == 0 ? 1 : -1]; // cwg614: yes
-int cwg614_b[(-1) % 2 == -1 ? 1 : -1];
+static_assert((-1) / 2 == 0, ""); // cwg614: yes
+static_assert((-1) % 2 == -1, "");
 
 namespace cwg615 { // cwg615: yes
   int f();

From 6b83fe552990966fdad0e5693a79b02b87d9526e Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Tue, 16 Apr 2024 11:03:53 -0700
Subject: [PATCH 29/58] [RISCV] Strength reduce mul by 2^n + 2/4/8 + 1 (#88911)

With zba, we can expand this to (add (shl X, C1), (shXadd X, X)).

Note that this is our first expansion to a three instruction sequence. I
believe this to general be a reasonable tradeoff for most architectures,
but we may want to (someday) consider a tuning flag here.

I plan to support 2^n + (2/4/8 + 1) eventually as well, but that comes
behind 2^N - 2^M. Both are also three instruction sequences.

---------

Co-authored-by: Min-Yih Hsu <min@myhsu.dev>
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 37 +++++++++++++++
 llvm/test/CodeGen/RISCV/rv64zba.ll          | 51 +++++++++++++++------
 2 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de2ad639f0d6c..dc7c6f83b9857 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13437,6 +13437,43 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
     }
   }
+
+  // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
+  // Matched in tablegen, avoid perturbing patterns.
+  switch (MulAmt) {
+  case 11:
+  case 13:
+  case 19:
+  case 21:
+  case 25:
+  case 27:
+  case 29:
+  case 37:
+  case 41:
+  case 45:
+  case 73:
+  case 91:
+    return SDValue();
+  default:
+    break;
+  }
+
+  // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
+  if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
+    unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
+    if (ScaleShift >= 1 && ScaleShift < 4) {
+      unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+      SDLoc DL(N);
+      SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ShiftAmt, DL, VT));
+      SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ScaleShift, DL, VT));
+      return DAG.getNode(
+          ISD::ADD, DL, VT, Shift1,
+          DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0)));
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index c3c757656be93..b4c80b60e0bad 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -598,31 +598,52 @@ define i64 @mul125(i64 %a) {
 }
 
 define i64 @mul131(i64 %a) {
-; CHECK-LABEL: mul131:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 131
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul131:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 131
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul131:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 131
   ret i64 %c
 }
 
 define i64 @mul133(i64 %a) {
-; CHECK-LABEL: mul133:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 133
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul133:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 133
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul133:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 133
   ret i64 %c
 }
 
 define i64 @mul137(i64 %a) {
-; CHECK-LABEL: mul137:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 137
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul137:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 137
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul137:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 137
   ret i64 %c
 }

From 1c2afbae9af22b58190c10e3517242d01d89d612 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 16 Apr 2024 11:05:45 -0700
Subject: [PATCH 30/58] [CodeGen,test] Test llvm-libc style alias attribute
 with UsingShadowDecl

The pattern is quite involved and deserves a specific codegen test.
This test would catch the bug in the first attempt of #87130
---
 clang/test/CodeGen/alias.cpp | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/clang/test/CodeGen/alias.cpp b/clang/test/CodeGen/alias.cpp
index 17c1e1ae32f03..a468c31d369ed 100644
--- a/clang/test/CodeGen/alias.cpp
+++ b/clang/test/CodeGen/alias.cpp
@@ -1,27 +1,42 @@
-// RUN: %clang_cc1 -triple x86_64-linux -verify -emit-llvm-only %s
-// RUN: not %clang_cc1 -triple x86_64-linux -emit-llvm-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -verify -emit-llvm-only -DERR %s
+// RUN: not %clang_cc1 -triple x86_64-linux -emit-llvm-only -fdiagnostics-parseable-fixits -DERR %s 2>&1 | FileCheck %s --check-prefix=FIXIT
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s
 
+#ifdef ERR
 void *f1_ifunc(void) { return nullptr; }
 void f1(void) __attribute__((alias("f1_ifunc")));
 // expected-error@-1 {{alias must point to a defined variable or function}}
 // expected-note@-2 {{must refer to its mangled name}}
 // expected-note@-3 {{function by that name is mangled as}}
-// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:47}:"alias(\"_Z8f1_ifuncv\")"
+// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:47}:"alias(\"_Z8f1_ifuncv\")"
 
 void *f6_resolver_resolver(void) { return 0; }
 void *f6_resolver(void) __attribute__((alias("f6_resolver_resolver")));
 // expected-error@-1 {{alias must point to a defined variable or function}}
 // expected-note@-2 {{must refer to its mangled name}}
 // expected-note@-3 {{function by that name is mangled as}}
-// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:40-[[@LINE-4]]:69}:"alias(\"_Z20f6_resolver_resolverv\")"
+// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:40-[[@LINE-4]]:69}:"alias(\"_Z20f6_resolver_resolverv\")"
 void f6(void) __attribute__((alias("f6_resolver")));
 // expected-error@-1 {{alias must point to a defined variable or function}}
 // expected-note@-2 {{must refer to its mangled name}}
 // expected-note@-3 {{function by that name is mangled as}}
-// CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:50}:"alias(\"_Z11f6_resolverv\")"
+// FIXIT: fix-it:"{{.*}}":{[[@LINE-4]]:30-[[@LINE-4]]:50}:"alias(\"_Z11f6_resolverv\")"
 
 __attribute__((unused, alias("resolver"), deprecated("hahahaha, isn't C great?")))
 void func();
 // expected-error@-2 {{alias must point to a defined variable or function}}
 // expected-note@-3 {{must refer to its mangled name}}
+#endif
 
+// CHECK: @_ZN4libc4log2Ed ={{.*}} alias double (double), ptr @log2
+// CHECK: define{{.*}} @log2(
+namespace libc { double log2(double x); }
+extern "C" double log2(double);
+namespace std { using ::log2; }
+using std::log2;
+
+namespace libc {
+decltype(libc::log2) __log2_impl__ __asm__("log2");
+decltype(libc::log2) log2 [[gnu::alias("log2")]];
+double __log2_impl__(double x) { return x; }
+}

From 5462b27026dee886fb896980d6ad9487200a6cbe Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:07:03 +0200
Subject: [PATCH 31/58] [NFC][libc++][TZDB] Refactors argument order. (#85781)

Putting the output reference argument first looks more sensible.
---
 libcxx/include/__chrono/formatter.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h
index 4ad59382a4148..b64cae529a294 100644
--- a/libcxx/include/__chrono/formatter.h
+++ b/libcxx/include/__chrono/formatter.h
@@ -79,7 +79,7 @@ namespace __formatter {
 // small). Therefore a duration uses its own conversion.
 template <class _CharT, class _Rep, class _Period>
 _LIBCPP_HIDE_FROM_ABI void
-__format_sub_seconds(const chrono::duration<_Rep, _Period>& __value, basic_stringstream<_CharT>& __sstr) {
+__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::duration<_Rep, _Period>& __value) {
   __sstr << std::use_facet<numpunct<_CharT>>(__sstr.getloc()).decimal_point();
 
   using __duration = chrono::duration<_Rep, _Period>;
@@ -110,13 +110,13 @@ __format_sub_seconds(const chrono::duration<_Rep, _Period>& __value, basic_strin
 }
 
 template <class _CharT, __is_time_point _Tp>
-_LIBCPP_HIDE_FROM_ABI void __format_sub_seconds(const _Tp& __value, basic_stringstream<_CharT>& __sstr) {
-  __formatter::__format_sub_seconds(__value.time_since_epoch(), __sstr);
+_LIBCPP_HIDE_FROM_ABI void __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const _Tp& __value) {
+  __formatter::__format_sub_seconds(__sstr, __value.time_since_epoch());
 }
 
 template <class _CharT, class _Duration>
 _LIBCPP_HIDE_FROM_ABI void
-__format_sub_seconds(const chrono::hh_mm_ss<_Duration>& __value, basic_stringstream<_CharT>& __sstr) {
+__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::hh_mm_ss<_Duration>& __value) {
   __sstr << std::use_facet<numpunct<_CharT>>(__sstr.getloc()).decimal_point();
   if constexpr (chrono::treat_as_floating_point_v<typename _Duration::rep>)
     std::format_to(std::ostreambuf_iterator<_CharT>{__sstr},
@@ -143,7 +143,7 @@ consteval bool __use_fraction() {
 }
 
 template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __format_year(int __year, basic_stringstream<_CharT>& __sstr) {
+_LIBCPP_HIDE_FROM_ABI void __format_year(basic_stringstream<_CharT>& __sstr, int __year) {
   if (__year < 0) {
     __sstr << _CharT('-');
     __year = -__year;
@@ -159,7 +159,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_year(int __year, basic_stringstream<_CharT>&
 }
 
 template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __format_century(int __year, basic_stringstream<_CharT>& __sstr) {
+_LIBCPP_HIDE_FROM_ABI void __format_century(basic_stringstream<_CharT>& __sstr, int __year) {
   // TODO FMT Write an issue
   // [tab:time.format.spec]
   //   %C The year divided by 100 using floored division. If the result is a
@@ -172,7 +172,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_century(int __year, basic_stringstream<_Char
 
 template <class _CharT, class _Tp>
 _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs(
-    const _Tp& __value, basic_stringstream<_CharT>& __sstr, basic_string_view<_CharT> __chrono_specs) {
+    basic_stringstream<_CharT>& __sstr, const _Tp& __value, basic_string_view<_CharT> __chrono_specs) {
   tm __t              = std::__convert_to_tm<tm>(__value);
   const auto& __facet = std::use_facet<time_put<_CharT>>(__sstr.getloc());
   for (auto __it = __chrono_specs.begin(); __it != __chrono_specs.end(); ++__it) {
@@ -196,7 +196,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs(
         // strftime's output is only defined in the range [00, 99].
         int __year = __t.tm_year + 1900;
         if (__year < 1000 || __year > 9999)
-          __formatter::__format_century(__year, __sstr);
+          __formatter::__format_century(__sstr, __year);
         else
           __facet.put(
               {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1));
@@ -242,7 +242,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs(
         __facet.put(
             {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1));
         if constexpr (__use_fraction<_Tp>())
-          __formatter::__format_sub_seconds(__value, __sstr);
+          __formatter::__format_sub_seconds(__sstr, __value);
         break;
 
         // Unlike time_put and strftime the formatting library requires %Y
@@ -283,13 +283,13 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs(
         // Depending on the platform's libc the range of supported years is
         // limited. Intead of of testing all conditions use the internal
         // implementation unconditionally.
-        __formatter::__format_year(__t.tm_year + 1900, __sstr);
+        __formatter::__format_year(__sstr, __t.tm_year + 1900);
         break;
 
       case _CharT('F'): {
         int __year = __t.tm_year + 1900;
         if (__year < 1000) {
-          __formatter::__format_year(__year, __sstr);
+          __formatter::__format_year(__sstr, __year);
           __sstr << std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "-{:02}-{:02}"), __t.tm_mon + 1, __t.tm_mday);
         } else
           __facet.put(
@@ -310,7 +310,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs(
             ++__it;
             __facet.put(
                 {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1));
-            __formatter::__format_sub_seconds(__value, __sstr);
+            __formatter::__format_sub_seconds(__sstr, __value);
             break;
           }
         }
@@ -512,7 +512,7 @@ __format_chrono(const _Tp& __value,
     if constexpr (chrono::__is_duration<_Tp>::value) {
       if (__value < __value.zero())
         __sstr << _CharT('-');
-      __formatter::__format_chrono_using_chrono_specs(chrono::abs(__value), __sstr, __chrono_specs);
+      __formatter::__format_chrono_using_chrono_specs(__sstr, chrono::abs(__value), __chrono_specs);
       // TODO FMT When keeping the precision it will truncate the string.
       // Note that the behaviour what the precision does isn't specified.
       __specs.__precision_ = -1;
@@ -556,7 +556,7 @@ __format_chrono(const _Tp& __value,
           __sstr << _CharT('-');
       }
 
-      __formatter::__format_chrono_using_chrono_specs(__value, __sstr, __chrono_specs);
+      __formatter::__format_chrono_using_chrono_specs(__sstr, __value, __chrono_specs);
     }
   }
 

From a75c9d059791f5d175f6c263d114d59e51b46120 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:18:34 +0200
Subject: [PATCH 32/58] [NFC][libc++] Moves ios_base's forward declaration.
 (#88027)

According to our synopsis it belonged to ios_fwd. This is not true in
the C++11 version of the Standard, I did not validate against C++98.

Moving this to ios's forward where it's declared in the standard allows
removing a module quirk. An earlier removal of std::vectors forward
declaration allows to remove all quirks for the iosfwd module part.

Since iosfwd includes __fwd/ios.h this does not change the required
includes.
---
 libcxx/include/__fwd/ios.h          | 2 ++
 libcxx/include/iosfwd               | 3 ---
 libcxx/utils/libcxx/test/modules.py | 2 --
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__fwd/ios.h b/libcxx/include/__fwd/ios.h
index 82c865d58cc75..48350709d4ce2 100644
--- a/libcxx/include/__fwd/ios.h
+++ b/libcxx/include/__fwd/ios.h
@@ -18,6 +18,8 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+class _LIBCPP_EXPORTED_FROM_ABI ios_base;
+
 template <class _CharT, class _Traits = char_traits<_CharT> >
 class _LIBCPP_TEMPLATE_VIS basic_ios;
 
diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd
index 9af5e05031850..2481667dd972c 100644
--- a/libcxx/include/iosfwd
+++ b/libcxx/include/iosfwd
@@ -25,7 +25,6 @@ template<>            struct char_traits<wchar_t>;
 
 template<class T>     class allocator;
 
-class ios_base;
 template <class charT, class traits = char_traits<charT> > class basic_ios;
 
 template <class charT, class traits = char_traits<charT> > class basic_streambuf;
@@ -124,8 +123,6 @@ using wosyncstream = basic_osyncstream<wchar_t>;  // C++20
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-class _LIBCPP_EXPORTED_FROM_ABI ios_base;
-
 template <class _CharT, class _Traits = char_traits<_CharT> >
 class _LIBCPP_TEMPLATE_VIS istreambuf_iterator;
 template <class _CharT, class _Traits = char_traits<_CharT> >
diff --git a/libcxx/utils/libcxx/test/modules.py b/libcxx/utils/libcxx/test/modules.py
index 3f3c7999a1a21..44c6292ff1140 100644
--- a/libcxx/utils/libcxx/test/modules.py
+++ b/libcxx/utils/libcxx/test/modules.py
@@ -26,8 +26,6 @@
 # The operators are added for private types like __iom_t10.
 SkipDeclarations["iomanip"] = ["std::operator<<", "std::operator>>"]
 
-SkipDeclarations["iosfwd"] = ["std::ios_base", "std::vector"]
-
 # This header also provides declarations in the namespace that might be
 # an error.
 SkipDeclarations["filesystem"] = [

From 9cd3e92f05fcc2c9168a7abc56d08f0d33bfdfdf Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:19:53 +0200
Subject: [PATCH 33/58] [libc++][modules] Removes some validation quirks.
 (#88031)

Recent unrelated header cleanups caused these quirks to become obsolete.
---
 libcxx/utils/libcxx/test/modules.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/libcxx/utils/libcxx/test/modules.py b/libcxx/utils/libcxx/test/modules.py
index 44c6292ff1140..aab7651c7bb03 100644
--- a/libcxx/utils/libcxx/test/modules.py
+++ b/libcxx/utils/libcxx/test/modules.py
@@ -52,8 +52,6 @@
     "std::operator==",
 ]
 
-# Declared in the forward header since std::string uses std::allocator
-SkipDeclarations["string"] = ["std::allocator"]
 # TODO MODULES remove zombie names
 # https://libcxx.llvm.org/Status/Cxx20.html#note-p0619
 SkipDeclarations["memory"] = [
@@ -61,9 +59,6 @@
     "std::get_temporary_buffer",
 ]
 
-# TODO MODULES this should be part of ios instead
-SkipDeclarations["streambuf"] = ["std::basic_ios"]
-
 # include/__type_traits/is_swappable.h
 SkipDeclarations["type_traits"] = [
     "std::swap",

From 41a830500aa5556a65198607ec751d8e3254c949 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:20:37 +0200
Subject: [PATCH 34/58] [libc++] Removes deprecated
 _LIBCPP_ENABLE_<VERSION>_REMOVED_FEATURES macros (#88548)

We marked those macros as deprecated in the last release with the intent
of
removing them in LLVM 19. This commit performs the removal.
---
 libcxx/docs/ReleaseNotes/19.rst               |  2 +-
 libcxx/docs/UsingLibcxx.rst                   | 12 -------
 libcxx/include/__config                       | 26 --------------
 ...le_removed_cpp17_features.compile.pass.cpp | 36 -------------------
 ...moved_cpp17_features.deprecated.verify.cpp | 20 -----------
 ...moved_cpp20_features.deprecated.verify.cpp | 20 -----------
 6 files changed, 1 insertion(+), 115 deletions(-)
 delete mode 100644 libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp
 delete mode 100644 libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp
 delete mode 100644 libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp

diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 45aac88e45502..53cc7a77d1af4 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -97,7 +97,7 @@ Deprecations and Removals
 - The ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION``
   macros have been removed in LLVM 19.
 
-- TODO: The ``_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES`` macros have
+- The ``_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES`` and ``_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES`` macros have
   been removed in LLVM 19. C++17 and C++20 removed features can still be re-enabled individually.
 
 - The ``_LIBCPP_INLINE_VISIBILITY`` and ``_VSTD`` macros have been removed in LLVM 19.
diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst
index c0e85ad4d5e24..8f945656de1ca 100644
--- a/libcxx/docs/UsingLibcxx.rst
+++ b/libcxx/docs/UsingLibcxx.rst
@@ -208,12 +208,6 @@ safety annotations.
 
 C++17 Specific Configuration Macros
 -----------------------------------
-**_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES**:
-  This macro is used to re-enable all the features removed in C++17. The effect
-  is equivalent to manually defining each macro listed below.
-  This macro is deprecated and will be removed in LLVM-19. Use the
-  individual macros listed below.
-
 **_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR**:
   This macro is used to re-enable `auto_ptr`.
 
@@ -238,12 +232,6 @@ C++20 Specific Configuration Macros
   This macro is used to re-enable the function
   ``std::shared_ptr<...>::unique()``.
 
-**_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES**:
-  This macro is used to re-enable all the features removed in C++20. The effect
-  is equivalent to manually defining each macro listed below.
-  This macro is deprecated and will be removed in LLVM-19. Use the
-  individual macros listed below.
-
 **_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS**:
   This macro is used to re-enable the `argument_type`, `result_type`,
   `first_argument_type`, and `second_argument_type` members of class
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 9b4155af1e3c6..4ccef2ca0d73b 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -16,17 +16,6 @@
 #  pragma GCC system_header
 #endif
 
-#if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
-#  pragma clang deprecated(                                                                                            \
-      _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES,                                                                           \
-      "_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES is deprecated in LLVM 18 and will be removed in LLVM 19")
-#endif
-#if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
-#  pragma clang deprecated(                                                                                            \
-      _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES,                                                                           \
-      "_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES is deprecated in LLVM 18 and will be removed in LLVM 19")
-#endif
-
 #if defined(__apple_build_version__)
 // Given AppleClang XX.Y.Z, _LIBCPP_APPLE_CLANG_VER is XXYZ (e.g. AppleClang 14.0.3 => 1403)
 #  define _LIBCPP_COMPILER_CLANG_BASED
@@ -1230,21 +1219,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__
 #  endif
 
-#  if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES)
-#    define _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
-#    define _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS
-#    define _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
-#    define _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
-#    define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
-#  endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES
-
-#  if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES)
-#    define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS
-#    define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS
-#    define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR
-#    define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS
-#  endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES
-
 // clang-format off
 #  define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")")
 #  define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")")
diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp
deleted file mode 100644
index 1b7acad3cfa46..0000000000000
--- a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.compile.pass.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// Test that defining _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES correctly defines
-// _LIBCPP_ENABLE_CXX17_REMOVED_FOO for each individual component macro.
-
-// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES -Wno-deprecated-pragma
-
-#include <__config>
-
-#include "test_macros.h"
-
-#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
-#  error _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR must be defined
-#endif
-
-#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS
-#  error _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS must be defined
-#endif
-
-#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
-#  error _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE must be defined
-#endif
-
-#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
-#error _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS must be defined
-#endif
-
-#ifndef _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
-#error _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR must be defined
-#endif
diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp
deleted file mode 100644
index 059c1b3ead4f1..0000000000000
--- a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.deprecated.verify.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <__config>
-
-// Ensure that defining _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES yields a
-// deprecation warning. We intend to issue a deprecation warning in LLVM 18
-// and remove the macro entirely in LLVM 19. As such, this test will be quite
-// short lived.
-
-// UNSUPPORTED: clang-modules-build
-
-// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES
-
-#include <__config> // expected-warning@* 1+ {{macro '_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES' has been marked as deprecated}}
diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp
deleted file mode 100644
index 163ff7d8fbda0..0000000000000
--- a/libcxx/test/libcxx/depr/enable_removed_cpp20_features.deprecated.verify.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <__config>
-
-// Ensure that defining _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES yields a
-// deprecation warning. We intend to issue a deprecation warning in LLVM 18
-// and remove the macro entirely in LLVM 19. As such, this test will be quite
-// short lived.
-
-// UNSUPPORTED: clang-modules-build
-
-// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES
-
-#include <version> // expected-warning@* 1+ {{macro '_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES' has been marked as deprecated}}

From 388da6a31b7ba3062f9306b894656e265b9b33eb Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:21:39 +0200
Subject: [PATCH 35/58] [libc++][test] Removes Clang 16 validation. (#88558)

---
 libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp
index 588ce2a3d17ed..614323b1ffd7b 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/asan.pass.cpp
@@ -29,8 +29,7 @@ void do_exit() {
 
 int main(int, char**)
 {
-#if TEST_STD_VER >= 11 && TEST_CLANG_VER >= 1600
-  // TODO(LLVM-18): Remove the special-casing
+#if TEST_STD_VER >= 11
   {
     typedef int T;
     typedef cpp17_input_iterator<T*> MyInputIter;
@@ -52,7 +51,7 @@ int main(int, char**)
     assert(v[1] == 'b');
     assert(is_contiguous_container_asan_correct(v));
   }
-#endif
+#endif // TEST_STD_VER >= 11
   {
     typedef cpp17_input_iterator<int*> MyInputIter;
     // Sould not trigger ASan.

From 8e0a4a89f940d17b520bbca040981f54195d3ea4 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:22:48 +0200
Subject: [PATCH 36/58] [libc++][doc] Documents -DLIBCXX_INSTALL_MODULES=ON.
 (#88547)

Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
---
 libcxx/docs/BuildingLibcxx.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libcxx/docs/BuildingLibcxx.rst b/libcxx/docs/BuildingLibcxx.rst
index 28145ed1049e0..a0a0cdb433974 100644
--- a/libcxx/docs/BuildingLibcxx.rst
+++ b/libcxx/docs/BuildingLibcxx.rst
@@ -206,6 +206,12 @@ libc++ specific options
 
   Toggle the installation of the libc++ headers.
 
+.. option:: LIBCXX_INSTALL_MODULES:BOOL
+
+  **Default**: ``OFF``
+
+  Toggle the installation of the experimental libc++ module sources.
+
 .. option:: LIBCXX_ENABLE_SHARED:BOOL
 
   **Default**: ``ON``

From 002297bdaa63f3f9f56b0051110ccf48f31c6825 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 16 Apr 2024 14:28:58 -0400
Subject: [PATCH 37/58] [gn] port 22629bb22a1b

---
 llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
index 1f6879358f22b..955854c7a134b 100644
--- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
@@ -125,6 +125,7 @@ cxx_sources = [
   "condition_variable_destructor.cpp",
   "error_category.cpp",
   "exception.cpp",
+  "expected.cpp",
   "fstream.cpp",
   "functional.cpp",
   "future.cpp",

From 8debcf03c535e14ee47b14fddfcaeae3f32d1317 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming@google.com>
Date: Tue, 16 Apr 2024 11:31:09 -0700
Subject: [PATCH 38/58] [mlir][sparse] introduce sparse_tensor.iterate
 operation (#88807)

A `sparse_tensor.iterate` iterates over a sparse iteration space
extracted from `sparse_tensor.extract_iteration_space` operation
introduced in https://github.com/llvm/llvm-project/pull/88554.

*DO NOT MERGE* before https://github.com/llvm/llvm-project/pull/88554
---
 .../Dialect/SparseTensor/IR/SparseTensor.h    |  38 ++
 .../SparseTensor/IR/SparseTensorAttrDefs.td   |  15 +
 .../SparseTensor/IR/SparseTensorOps.td        | 152 +++++++-
 .../SparseTensor/IR/SparseTensorTypes.td      |  95 +++++
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 365 ++++++++++++++++++
 mlir/test/Dialect/SparseTensor/invalid.mlir   | 139 +++++++
 mlir/test/Dialect/SparseTensor/roundtrip.mlir |  53 +++
 .../SparseTensor/sparse_itertion_licm.mlir    |  26 ++
 8 files changed, 882 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index 5e523ec428aef..081a9b8cad8d6 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -17,9 +17,13 @@
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/TensorEncoding.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
+#include "llvm/ADT/bit.h"
+
 //===----------------------------------------------------------------------===//
 //
 // Type aliases to help code be more self-documenting. Unfortunately
@@ -41,6 +45,40 @@ using Level = uint64_t;
 /// including the value `ShapedType::kDynamic` (for shapes).
 using Size = int64_t;
 
+/// A simple wrapper to encode a bitset of defined  (at most 64) levels.
+class LevelSet {
+  uint64_t bits = 0;
+
+public:
+  LevelSet() = default;
+  explicit LevelSet(uint64_t bits) : bits(bits) {}
+  operator uint64_t() const { return bits; }
+
+  LevelSet &set(unsigned i) {
+    assert(i < 64);
+    bits |= 1 << i;
+    return *this;
+  }
+
+  LevelSet &operator|=(LevelSet lhs) {
+    bits |= static_cast<uint64_t>(lhs);
+    return *this;
+  }
+
+  LevelSet &lshift(unsigned offset) {
+    bits = bits << offset;
+    return *this;
+  }
+
+  bool operator[](unsigned i) const {
+    assert(i < 64);
+    return (bits & (1 << i)) != 0;
+  }
+
+  unsigned count() const { return llvm::popcount(bits); }
+  bool empty() const { return bits == 0; }
+};
+
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index 4a9b9169ae4b8..d5398a98f5b17 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -19,6 +19,21 @@ class SparseTensor_Attr<string name,
                         list<Trait> traits = []>
     : AttrDef<SparseTensor_Dialect, name, traits>;
 
+//===----------------------------------------------------------------------===//
+// A simple bitset attribute wrapped over a single int64_t to encode a set of
+// sparse tensor levels.
+//===----------------------------------------------------------------------===//
+
+def LevelSetAttr :
+    TypedAttrBase<
+      I64, "IntegerAttr",
+      And<[CPred<"::llvm::isa<::mlir::IntegerAttr>($_self)">,
+           CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getType().isInteger(64)">]>,
+      "LevelSet attribute"> {
+  let returnType = [{::mlir::sparse_tensor::LevelSet}];
+  let convertFromStorage = [{::mlir::sparse_tensor::LevelSet($_self.getValue().getZExtValue())}];
+}
+
 //===----------------------------------------------------------------------===//
 // These attributes are just like `IndexAttr` except that they clarify whether
 // the index refers to a dimension (an axis of the semantic tensor) or a level
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index 0cfc64f9988a0..b43d716d5e864 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -15,6 +15,8 @@ include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td"
 include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
 
 //===----------------------------------------------------------------------===//
 // Base class.
@@ -1277,7 +1279,7 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu
 
 def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator,
     ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp",
-                 "ForeachOp"]>]>,
+                 "ForeachOp", "IterateOp"]>]>,
     Arguments<(ins Variadic<AnyType>:$results)> {
   let summary = "Yield from sparse_tensor set-like operations";
   let description = [{
@@ -1430,6 +1432,154 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Operations.
+//===----------------------------------------------------------------------===//
+
+def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space",
+    [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
+
+  let arguments = (ins AnySparseTensor:$tensor,
+                       Optional<AnySparseIterator>:$parentIter,
+                       LevelAttr:$loLvl, LevelAttr:$hiLvl);
+
+  let results = (outs AnySparseIterSpace:$resultSpace);
+
+  let summary = "Extract an iteration space from a sparse tensor between certain levels";
+  let description = [{
+      Extracts a `!sparse_tensor.iter_space` from a sparse tensor between
+      certian (consecutive) levels.
+
+      `tensor`: the input sparse tensor that defines the iteration space.
+      `parentIter`: the iterator for the previous level, at which the iteration space
+      at the current levels will be extracted.
+      `loLvl`, `hiLvl`: the level range between [loLvl, hiLvl) in the input tensor that
+      the returned iteration space covers. `hiLvl - loLvl` defines the dimension of the
+      iteration space.
+
+      Example:
+      ```mlir
+      // Extracts a 1-D iteration space from a COO tensor at level 1.
+      %space = sparse_tensor.iteration.extract_space %sp at %it1 lvls = 1
+        : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+      ```
+  }];
+
+
+  let extraClassDeclaration = [{
+    std::pair<Level, Level> getLvlRange() {
+      return std::make_pair(getLoLvl(), getHiLvl());
+    }
+    unsigned getSpaceDim() {
+      return getHiLvl() - getLoLvl();
+    }
+    ArrayRef<::mlir::sparse_tensor::LevelType> getSpaceLvlTypes() {
+      return getResultSpace().getType().getLvlTypes();
+    }
+  }];
+
+  let hasVerifier = 1;
+  let assemblyFormat = "$tensor (`at` $parentIter^)? `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) "
+                       " attr-dict `:` type($tensor) (`,` type($parentIter)^)?";
+}
+
+def IterateOp : SparseTensor_Op<"iterate",
+    [RecursiveMemoryEffects, RecursivelySpeculatable,
+     DeclareOpInterfaceMethods<LoopLikeOpInterface,
+      ["getInitsMutable", "getLoopResults", "getRegionIterArgs",
+       "getYieldedValuesMutable"]>,
+     DeclareOpInterfaceMethods<RegionBranchOpInterface,
+      ["getEntrySuccessorOperands"]>,
+     SingleBlockImplicitTerminator<"sparse_tensor::YieldOp">]> {
+
+  let arguments = (ins AnySparseIterSpace:$iterSpace,
+                       Variadic<AnyType>:$initArgs,
+                       LevelSetAttr:$crdUsedLvls);
+  let results = (outs Variadic<AnyType>:$results);
+  let regions = (region SizedRegion<1>:$region);
+
+  let summary = "Iterate over a sparse iteration space";
+  let description = [{
+      The `sparse_tensor.iterate` operations represents a loop over the
+      provided iteration space extracted from a specific sparse tensor.
+      The operation defines an SSA value for a sparse iterator that points
+      to the current stored element in the sparse tensor and SSA values
+      for coordinates of the stored element. The coordinates are always
+      converted to `index` type despite of the underlying sparse tensor
+      storage. When coordinates are not used, the SSA values can be skipped
+      by `_` symbols, which usually leads to simpler generated code after
+      sparsification. For example:
+
+      ```mlir
+      // The coordinate for level 0 is not used when iterating over a 2-D
+      // iteration space.
+      %sparse_tensor.iterate %iterator in %space at(_, %crd_1)
+        : !sparse_tensor.iter_space<#CSR, lvls = 0 to 2>
+      ```
+
+      `sparse_tensor.iterate` can also operate on loop-carried variables
+      and returns the final values after loop termination.
+      The initial values of the variables are passed as additional SSA operands
+      to the iterator SSA value and used coordinate SSA values mentioned
+      above. The operation region has an argument for the iterator, variadic
+      arguments for specified (used) coordiates and followed by one argument
+      for each loop-carried variable, representing the value of the variable
+      at the current iteration.
+      The body region must contain exactly one block that terminates with
+      `sparse_tensor.yield`.
+
+      `sparse_tensor.iterate` results hold the final values after the last
+      iteration. If the `sparse_tensor.iterate` defines any values, a yield
+      must be explicitly present.
+      The number and types of the `sparse_tensor.iterate` results must match
+      the initial values in the iter_args binding and the yield operands.
+
+
+      A nested `sparse_tensor.iterate` example that prints all the coordinates
+      stored in the sparse input:
+
+      ```mlir
+      func.func @nested_iterate(%sp : tensor<4x8xf32, #COO>) {
+        // Iterates over the first level of %sp
+        %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+        %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd0)
+            : !sparse_tensor.iter_space<#COO, lvls = 0 to 1>  {
+          // Iterates over the second level of %sp
+          %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1
+              : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1>
+          %r2 = sparse_tensor.iterate %it2 in %l2 at (crd1)
+              : !sparse_tensor.iter_space<#COO, lvls = 1 to 2>  {
+             vector.print %crd0 : index
+             vector.print %crd1 : index
+          }
+        }
+      }
+
+      ```
+  }];
+
+  let extraClassDeclaration = [{
+    unsigned getSpaceDim() {
+      return getIterSpace().getType().getSpaceDim();
+    }
+    BlockArgument getIterator() {
+      return getRegion().getArguments().front();
+    }
+    Block::BlockArgListType getCrds() {
+      // The first block argument is iterator, the remaining arguments are
+      // referenced coordinates.
+      return getRegion().getArguments().slice(1, getCrdUsedLvls().count());
+    }
+    unsigned getNumRegionIterArgs() {
+      return getRegion().getArguments().size() - 1 - getCrdUsedLvls().count();
+    }
+  }];
+
+  let hasVerifier = 1;
+  let hasRegionVerifier = 1;
+  let hasCustomAssemblyFormat = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Sparse Tensor Debugging and Test-Only Operations.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
index 185cff46ae25d..264a0a5b3bee6 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
@@ -72,4 +72,99 @@ def SparseTensorStorageSpecifier
     : Type<CPred<"::llvm::isa<::mlir::sparse_tensor::StorageSpecifierType>($_self)">, "metadata",
           "::mlir::sparse_tensor::StorageSpecifierType">;
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Types.
+//===----------------------------------------------------------------------===//
+
+def SparseTensor_IterSpace : SparseTensor_Type<"IterSpace"> {
+  let mnemonic = "iter_space";
+
+  let description = [{
+    A sparse iteration space that represents an abstract N-D (sparse) iteration space
+    extracted from a sparse tensor.
+
+    Examples:
+
+    ```mlir
+    // An iteration space extracted from a CSR tensor between levels [0, 2).
+    !iter_space<#CSR, lvls = 0 to 2>
+    ```
+  }];
+
+  let parameters = (ins
+     SparseTensorEncodingAttr : $encoding,
+     "Level" : $loLvl,
+     "Level" : $hiLvl
+  );
+
+  let extraClassDeclaration = [{
+     /// The the dimension of the iteration space.
+     unsigned getSpaceDim() const {
+       return getHiLvl() - getLoLvl();
+     }
+
+     /// Get the level types for the iteration space.
+     ArrayRef<LevelType> getLvlTypes() const {
+       return getEncoding().getLvlTypes().slice(getLoLvl(), getSpaceDim());
+     }
+
+     /// Whether the iteration space is unique (i.e., no duplicated coordinate).
+     bool isUnique() {
+       return !getLvlTypes().back().isa<LevelPropNonDefault::Nonunique>();
+     }
+
+     /// Get the corresponding iterator type.
+     ::mlir::sparse_tensor::IteratorType getIteratorType() const;
+  }];
+
+  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
+}
+
+def SparseTensor_Iterator : SparseTensor_Type<"Iterator"> {
+  let mnemonic = "iterator";
+
+  let description = [{
+    An iterator that points to the current element in the corresponding iteration space.
+
+    Examples:
+
+    ```mlir
+    // An iterator that iterates over a iteration space of type `!iter_space<#CSR, lvls = 0 to 2>`
+    !iterator<#CSR, lvls = 0 to 2>
+    ```
+  }];
+
+  let parameters = (ins
+     SparseTensorEncodingAttr : $encoding,
+     "Level" : $loLvl,
+     "Level" : $hiLvl
+  );
+
+  let extraClassDeclaration = [{
+     /// Get the corresponding iteration space type.
+     ::mlir::sparse_tensor::IterSpaceType getIterSpaceType() const;
+
+     unsigned getSpaceDim() const { return getIterSpaceType().getSpaceDim(); }
+     ArrayRef<LevelType> getLvlTypes() const { return getIterSpaceType().getLvlTypes(); }
+     bool isUnique() { return getIterSpaceType().isUnique(); }
+  }];
+
+  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
+}
+
+def IsSparseSparseIterSpaceTypePred
+    : CPred<"::llvm::isa<::mlir::sparse_tensor::IterSpaceType>($_self)">;
+
+def IsSparseSparseIteratorTypePred
+    : CPred<"::llvm::isa<::mlir::sparse_tensor::IteratorType>($_self)">;
+
+def AnySparseIterSpace
+    : Type<IsSparseSparseIterSpaceTypePred, "sparse iteration space",
+          "::mlir::sparse_tensor::IterSpaceType">;
+
+def AnySparseIterator
+    : Type<IsSparseSparseIteratorTypePred, "sparse iterator",
+          "::mlir::sparse_tensor::IteratorType">;
+
+
 #endif // SPARSETENSOR_TYPES
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index e9058394d33da..36908def09f40 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -30,6 +30,14 @@
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrEnums.cpp.inc"
 
+// Forward declarations, following custom print/parsing methods are referenced
+// by the generated code for SparseTensorTypes.td.
+static mlir::ParseResult parseLevelRange(mlir::AsmParser &,
+                                         mlir::sparse_tensor::Level &,
+                                         mlir::sparse_tensor::Level &);
+static void printLevelRange(mlir::AsmPrinter &, mlir::sparse_tensor::Level,
+                            mlir::sparse_tensor::Level);
+
 #define GET_TYPEDEF_CLASSES
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc"
 
@@ -1953,6 +1961,363 @@ LogicalResult SortOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Operations.
+//===----------------------------------------------------------------------===//
+
+IterSpaceType IteratorType::getIterSpaceType() const {
+  return IterSpaceType::get(getContext(), getEncoding(), getLoLvl(),
+                            getHiLvl());
+}
+
+IteratorType IterSpaceType::getIteratorType() const {
+  return IteratorType::get(getContext(), getEncoding(), getLoLvl(), getHiLvl());
+}
+
+/// Parses a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static ParseResult parseLevelRange(AsmParser &parser, Level &lvlLo,
+                                   Level &lvlHi) {
+  if (parser.parseInteger(lvlLo))
+    return failure();
+
+  if (succeeded(parser.parseOptionalKeyword("to"))) {
+    if (parser.parseInteger(lvlHi))
+      return failure();
+  } else {
+    lvlHi = lvlLo + 1;
+  }
+
+  if (lvlHi <= lvlLo)
+    parser.emitError(parser.getNameLoc(),
+                     "expect larger level upper bound than lower bound");
+
+  return success();
+}
+
+/// Parses a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static ParseResult parseLevelRange(OpAsmParser &parser, IntegerAttr &lvlLoAttr,
+                                   IntegerAttr &lvlHiAttr) {
+  Level lvlLo, lvlHi;
+  if (parseLevelRange(parser, lvlLo, lvlHi))
+    return failure();
+
+  lvlLoAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlLo);
+  lvlHiAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlHi);
+  return success();
+}
+
+/// Prints a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static void printLevelRange(AsmPrinter &p, Level lo, Level hi) {
+
+  if (lo + 1 == hi)
+    p << lo;
+  else
+    p << lo << " to " << hi;
+}
+
+/// Prints a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo,
+                            IntegerAttr lvlHi) {
+  unsigned lo = lvlLo.getValue().getZExtValue();
+  unsigned hi = lvlHi.getValue().getZExtValue();
+  printLevelRange(p, lo, hi);
+}
+
+static ParseResult
+parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state,
+                     SmallVectorImpl<OpAsmParser::Argument> &iterators,
+                     SmallVectorImpl<OpAsmParser::Argument> &iterArgs) {
+  SmallVector<OpAsmParser::UnresolvedOperand> spaces;
+  SmallVector<OpAsmParser::UnresolvedOperand> initArgs;
+
+  // Parses "%iters, ... in %spaces, ..."
+  if (parser.parseArgumentList(iterators) || parser.parseKeyword("in") ||
+      parser.parseOperandList(spaces))
+    return failure();
+
+  if (iterators.size() != spaces.size())
+    return parser.emitError(
+        parser.getNameLoc(),
+        "mismatch in number of sparse iterators and sparse spaces");
+
+  // Parse "at(%crd0, _, ...)"
+  LevelSet crdUsedLvlSet;
+  bool hasUsedCrds = succeeded(parser.parseOptionalKeyword("at"));
+  unsigned lvlCrdCnt = 0;
+  if (hasUsedCrds) {
+    ParseResult crdList = parser.parseCommaSeparatedList(
+        OpAsmParser::Delimiter::Paren, [&]() -> ParseResult {
+          if (parser.parseOptionalKeyword("_")) {
+            if (parser.parseArgument(iterArgs.emplace_back()))
+              return failure();
+            // Always use IndexType for the coordinate.
+            crdUsedLvlSet.set(lvlCrdCnt);
+            iterArgs.back().type = parser.getBuilder().getIndexType();
+          }
+          lvlCrdCnt += 1;
+          return success();
+        });
+    if (failed(crdList)) {
+      return parser.emitError(
+          parser.getNameLoc(),
+          "expecting SSA value or \"_\" for level coordinates");
+    }
+  }
+  // Set the CrdUsedLvl bitset.
+  state.addAttribute("crdUsedLvls",
+                     parser.getBuilder().getI64IntegerAttr(crdUsedLvlSet));
+
+  // Parse "iter_args(%arg = %init, ...)"
+  bool hasIterArgs = succeeded(parser.parseOptionalKeyword("iter_args"));
+  if (hasIterArgs)
+    if (parser.parseAssignmentList(iterArgs, initArgs))
+      return failure();
+
+  SmallVector<Type> iterSpaceTps;
+  // parse ": sparse_tensor.iter_space -> ret"
+  if (parser.parseColon() || parser.parseTypeList(iterSpaceTps))
+    return failure();
+  if (iterSpaceTps.size() != spaces.size())
+    return parser.emitError(parser.getNameLoc(),
+                            "mismatch in number of iteration space operands "
+                            "and iteration space types");
+
+  for (auto [it, tp] : llvm::zip_equal(iterators, iterSpaceTps)) {
+    IterSpaceType spaceTp = llvm::dyn_cast<IterSpaceType>(tp);
+    if (!spaceTp)
+      return parser.emitError(parser.getNameLoc(),
+                              "expected sparse_tensor.iter_space type for "
+                              "iteration space operands");
+    if (hasUsedCrds && spaceTp.getSpaceDim() != lvlCrdCnt)
+      return parser.emitError(parser.getNameLoc(),
+                              "mismatch in number of iteration space dimension "
+                              "and specified coordinates");
+    it.type = spaceTp.getIteratorType();
+  }
+
+  if (hasIterArgs)
+    if (parser.parseArrowTypeList(state.types))
+      return failure();
+
+  // Resolves input operands.
+  if (parser.resolveOperands(spaces, iterSpaceTps, parser.getNameLoc(),
+                             state.operands))
+    return failure();
+
+  if (hasIterArgs) {
+    unsigned numCrds = crdUsedLvlSet.count();
+    // Strip off leading args that used for coordinates.
+    MutableArrayRef args = MutableArrayRef(iterArgs).drop_front(numCrds);
+    if (args.size() != initArgs.size() || args.size() != state.types.size()) {
+      return parser.emitError(
+          parser.getNameLoc(),
+          "mismatch in number of iteration arguments and return values");
+    }
+
+    for (auto [it, init, tp] : llvm::zip_equal(args, initArgs, state.types)) {
+      it.type = tp;
+      if (parser.resolveOperand(init, tp, state.operands))
+        return failure();
+    }
+  }
+  return success();
+}
+
+LogicalResult ExtractIterSpaceOp::inferReturnTypes(
+    MLIRContext *ctx, std::optional<Location> loc, ValueRange ops,
+    DictionaryAttr attr, OpaqueProperties prop, RegionRange region,
+    SmallVectorImpl<mlir::Type> &ret) {
+
+  ExtractIterSpaceOp::Adaptor adaptor(ops, attr, prop, region);
+  SparseTensorType stt = getSparseTensorType(adaptor.getTensor());
+  ret.push_back(IterSpaceType::get(ctx, stt.getEncoding(), adaptor.getLoLvl(),
+                                   adaptor.getHiLvl()));
+  return success();
+}
+
+LogicalResult ExtractIterSpaceOp::verify() {
+  if (getLoLvl() >= getHiLvl())
+    return emitOpError("expected smaller level low than level high");
+
+  TypedValue<IteratorType> pIter = getParentIter();
+  if ((pIter && getLoLvl() == 0) || (!pIter && getLoLvl() != 0)) {
+    return emitOpError(
+        "parent iterator should be specified iff level lower bound equals 0");
+  }
+
+  if (pIter) {
+    IterSpaceType spaceTp = getResultSpace().getType();
+    if (pIter.getType().getEncoding() != spaceTp.getEncoding())
+      return emitOpError(
+          "mismatch in parent iterator encoding and iteration space encoding.");
+
+    if (spaceTp.getLoLvl() != pIter.getType().getHiLvl())
+      return emitOpError("parent iterator should be used to extract an "
+                         "iteration space from a consecutive level.");
+  }
+
+  return success();
+}
+
+ParseResult IterateOp::parse(OpAsmParser &parser, OperationState &result) {
+  OpAsmParser::Argument iterator;
+  OpAsmParser::UnresolvedOperand iterSpace;
+
+  SmallVector<OpAsmParser::Argument> iters, iterArgs;
+  if (parseSparseSpaceLoop(parser, result, iters, iterArgs))
+    return failure();
+  if (iters.size() != 1)
+    return parser.emitError(parser.getNameLoc(),
+                            "expected only one iterator/iteration space");
+
+  iters.append(iterArgs);
+  Region *body = result.addRegion();
+  if (parser.parseRegion(*body, iters))
+    return failure();
+
+  IterateOp::ensureTerminator(*body, parser.getBuilder(), result.location);
+
+  // Parse the optional attribute list.
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+
+  return success();
+}
+
+/// Prints the initialization list in the form of
+///   <prefix>(%inner = %outer, %inner2 = %outer2, <...>)
+/// where 'inner' values are assumed to be region arguments and 'outer' values
+/// are regular SSA values.
+static void printInitializationList(OpAsmPrinter &p,
+                                    Block::BlockArgListType blocksArgs,
+                                    ValueRange initializers,
+                                    StringRef prefix = "") {
+  assert(blocksArgs.size() == initializers.size() &&
+         "expected same length of arguments and initializers");
+  if (initializers.empty())
+    return;
+
+  p << prefix << '(';
+  llvm::interleaveComma(llvm::zip(blocksArgs, initializers), p, [&](auto it) {
+    p << std::get<0>(it) << " = " << std::get<1>(it);
+  });
+  p << ")";
+}
+
+static void printUsedCrdsList(OpAsmPrinter &p, unsigned spaceDim,
+                              Block::BlockArgListType blocksArgs,
+                              LevelSet crdUsedLvls) {
+  if (crdUsedLvls.empty())
+    return;
+
+  p << " at(";
+  for (unsigned i = 0; i < spaceDim; i++) {
+    if (crdUsedLvls[i]) {
+      p << blocksArgs.front();
+      blocksArgs = blocksArgs.drop_front();
+    } else {
+      p << "_";
+    }
+    if (i != spaceDim - 1)
+      p << ", ";
+  }
+  assert(blocksArgs.empty());
+  p << ")";
+}
+
+void IterateOp::print(OpAsmPrinter &p) {
+  p << " " << getIterator() << " in " << getIterSpace();
+  printUsedCrdsList(p, getSpaceDim(), getCrds(), getCrdUsedLvls());
+  printInitializationList(p, getRegionIterArgs(), getInitArgs(), " iter_args");
+
+  p << " : " << getIterSpace().getType() << " ";
+  if (!getInitArgs().empty())
+    p << "-> (" << getInitArgs().getTypes() << ") ";
+
+  p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
+                /*printBlockTerminators=*/!getInitArgs().empty());
+}
+
+LogicalResult IterateOp::verify() {
+  if (getInitArgs().size() != getNumResults()) {
+    return emitOpError(
+        "mismatch in number of loop-carried values and defined values");
+  }
+  return success();
+}
+
+LogicalResult IterateOp::verifyRegions() {
+  if (getIterator().getType() != getIterSpace().getType().getIteratorType())
+    return emitOpError("mismatch in iterator and iteration space type");
+  if (getNumRegionIterArgs() != getNumResults())
+    return emitOpError(
+        "mismatch in number of basic block args and defined values");
+
+  auto initArgs = getInitArgs();
+  auto iterArgs = getRegionIterArgs();
+  auto yieldVals = getYieldedValues();
+  auto opResults = getResults();
+  if (!llvm::all_equal({initArgs.size(), iterArgs.size(), yieldVals.size(),
+                        opResults.size()})) {
+    return emitOpError() << "number mismatch between iter args and results.";
+  }
+
+  for (auto [i, init, iter, yield, ret] :
+       llvm::enumerate(initArgs, iterArgs, yieldVals, opResults)) {
+    if (init.getType() != ret.getType())
+      return emitOpError() << "types mismatch between " << i
+                           << "th iter operand and defined value";
+    if (iter.getType() != ret.getType())
+      return emitOpError() << "types mismatch between " << i
+                           << "th iter region arg and defined value";
+    if (yield.getType() != ret.getType())
+      return emitOpError() << "types mismatch between " << i
+                           << "th yield value and defined value";
+  }
+
+  return success();
+}
+
+/// IterateOp implemented OpInterfaces' methods.
+SmallVector<Region *> IterateOp::getLoopRegions() { return {&getRegion()}; }
+
+MutableArrayRef<OpOperand> IterateOp::getInitsMutable() {
+  return getInitArgsMutable();
+}
+
+Block::BlockArgListType IterateOp::getRegionIterArgs() {
+  return getRegion().getArguments().take_back(getNumRegionIterArgs());
+}
+
+std::optional<MutableArrayRef<OpOperand>> IterateOp::getYieldedValuesMutable() {
+  return cast<sparse_tensor::YieldOp>(
+             getRegion().getBlocks().front().getTerminator())
+      .getResultsMutable();
+}
+
+std::optional<ResultRange> IterateOp::getLoopResults() { return getResults(); }
+
+OperandRange IterateOp::getEntrySuccessorOperands(RegionBranchPoint point) {
+  return getInitArgs();
+}
+
+void IterateOp::getSuccessorRegions(RegionBranchPoint point,
+                                    SmallVectorImpl<RegionSuccessor> &regions) {
+  // Both the operation itself and the region may be branching into the body or
+  // back into the operation itself.
+  regions.push_back(RegionSuccessor(&getRegion(), getRegionIterArgs()));
+  // It is possible for loop not to enter the body.
+  regions.push_back(RegionSuccessor(getResults()));
+}
+
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Dialect Setups.
+//===----------------------------------------------------------------------===//
+
 /// Materialize a single constant operation from a given attribute value with
 /// the desired resultant type.
 Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder,
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index 7f5c05190fc9a..b13024cd4ed99 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -1012,3 +1012,142 @@ func.func @sparse_print(%arg0: tensor<10x10xf64>) {
   sparse_tensor.print %arg0 : tensor<10x10xf64>
   return
 }
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 2>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' expect larger level upper bound than lower bound}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 to 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 2>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+#CSR = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : dense,
+    j : compressed
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#CSR, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op mismatch in parent iterator encoding and iteration space encoding.}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#CSR, lvls = 0>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be used to extract an iteration space from a consecutive level.}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return
+}
+
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  // expected-error @+1 {{'sparse_tensor.iterate' op different number of region iter_args and yielded values: 2 != 1}}
+  %r1, %r2 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i, %sj = %j): !sparse_tensor.iter_space<#COO, lvls = 0> -> (index, index) {
+    sparse_tensor.yield %si : index
+  }
+  return %r1 : index
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+// expected-note@+1 {{prior use here}}
+func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index) -> f32 {
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  // expected-error @+1 {{use of value '%i' expects different type than prior uses: 'f32' vs 'index'}}
+  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> f32 {
+    sparse_tensor.yield %outer : f32
+  }
+  return %r1 : f32
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  // expected-error @+1 {{'sparse_tensor.iterate' op 0-th region iter_arg and 0-th yielded value have different type: 'index' != 'f32'}}
+  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> index {
+    %y = arith.constant 1.0 :  f32
+    sparse_tensor.yield %y : f32
+  }
+  return %r1 : index
+}
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 12f69c1d37b9c..e9a898f16b41d 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -738,3 +738,56 @@ func.func @sparse_has_runtime() -> i1 {
   %has_runtime = sparse_tensor.has_runtime_library
   return %has_runtime : i1
 }
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+// CHECK-LABEL:   func.func @sparse_extract_iter_space(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: !sparse_tensor.iterator<#sparse{{[0-9]*}}, lvls = 0>)
+// CHECK:           %[[VAL_2:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0
+// CHECK:           %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] at %[[VAL_1]] lvls = 1
+// CHECK:           return %[[VAL_2]], %[[VAL_3]] : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0>, !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 1>
+// CHECK:         }
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>)
+  -> (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) {
+  // Extracting the iteration space for the first level needs no parent iterator.
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  // Extracting the iteration space for the second level needs a parent iterator.
+  %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return %l1, %l2 : !sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>
+}
+
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+// CHECK-LABEL:   func.func @sparse_iterate(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: index,
+// CHECK-SAME:      %[[VAL_2:.*]]: index) -> index {
+// CHECK:           %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 : tensor<4x8xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]]) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> -> (index) {
+// CHECK:             sparse_tensor.yield %[[VAL_7]] : index
+// CHECK:           }
+// CHECK:           return %[[VAL_4]] : index
+// CHECK:         }
+func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index {
+    sparse_tensor.yield %outer : index
+  }
+  return %r1 : index
+}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir b/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir
new file mode 100644
index 0000000000000..e7158d04b37fe
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir
@@ -0,0 +1,26 @@
+// RUN: mlir-opt %s --loop-invariant-code-motion | FileCheck %s
+
+#CSR = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : dense,
+    j : compressed
+  )
+}>
+
+// Make sure that pure instructions are hoisted outside the loop.
+//
+// CHECK: sparse_tensor.values
+// CHECK: sparse_tensor.positions
+// CHECK: sparse_tensor.coordinate
+// CHECK: sparse_tensor.iterate
+func.func @sparse_iterate(%sp : tensor<?x?xf64, #CSR>) {
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<?x?xf64, #CSR>
+  sparse_tensor.iterate %it1 in %l1 at (%crd) : !sparse_tensor.iter_space<#CSR, lvls = 0> {
+    %0 = sparse_tensor.values %sp : tensor<?x?xf64, #CSR> to memref<?xf64>
+    %1 = sparse_tensor.positions %sp { level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+    %2 = sparse_tensor.coordinates  %sp { level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+    "test.op"(%0, %1, %2) : (memref<?xf64>, memref<?xindex>, memref<?xindex>) -> ()
+  }
+
+  return
+}

From b9556532c7391a2acb77ab0f7d7b36e1dc382b24 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming@google.com>
Date: Tue, 16 Apr 2024 11:31:33 -0700
Subject: [PATCH 39/58] Revert "[mlir][sparse] introduce sparse_tensor.iterate
 operation" (#88953)

Reverts llvm/llvm-project#88807 (merged by mistake)
---
 .../Dialect/SparseTensor/IR/SparseTensor.h    |  38 --
 .../SparseTensor/IR/SparseTensorAttrDefs.td   |  15 -
 .../SparseTensor/IR/SparseTensorOps.td        | 152 +-------
 .../SparseTensor/IR/SparseTensorTypes.td      |  95 -----
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 365 ------------------
 mlir/test/Dialect/SparseTensor/invalid.mlir   | 139 -------
 mlir/test/Dialect/SparseTensor/roundtrip.mlir |  53 ---
 .../SparseTensor/sparse_itertion_licm.mlir    |  26 --
 8 files changed, 1 insertion(+), 882 deletions(-)
 delete mode 100644 mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index 081a9b8cad8d6..5e523ec428aef 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -17,13 +17,9 @@
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/TensorEncoding.h"
-#include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
-#include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 
-#include "llvm/ADT/bit.h"
-
 //===----------------------------------------------------------------------===//
 //
 // Type aliases to help code be more self-documenting. Unfortunately
@@ -45,40 +41,6 @@ using Level = uint64_t;
 /// including the value `ShapedType::kDynamic` (for shapes).
 using Size = int64_t;
 
-/// A simple wrapper to encode a bitset of defined  (at most 64) levels.
-class LevelSet {
-  uint64_t bits = 0;
-
-public:
-  LevelSet() = default;
-  explicit LevelSet(uint64_t bits) : bits(bits) {}
-  operator uint64_t() const { return bits; }
-
-  LevelSet &set(unsigned i) {
-    assert(i < 64);
-    bits |= 1 << i;
-    return *this;
-  }
-
-  LevelSet &operator|=(LevelSet lhs) {
-    bits |= static_cast<uint64_t>(lhs);
-    return *this;
-  }
-
-  LevelSet &lshift(unsigned offset) {
-    bits = bits << offset;
-    return *this;
-  }
-
-  bool operator[](unsigned i) const {
-    assert(i < 64);
-    return (bits & (1 << i)) != 0;
-  }
-
-  unsigned count() const { return llvm::popcount(bits); }
-  bool empty() const { return bits == 0; }
-};
-
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index d5398a98f5b17..4a9b9169ae4b8 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -19,21 +19,6 @@ class SparseTensor_Attr<string name,
                         list<Trait> traits = []>
     : AttrDef<SparseTensor_Dialect, name, traits>;
 
-//===----------------------------------------------------------------------===//
-// A simple bitset attribute wrapped over a single int64_t to encode a set of
-// sparse tensor levels.
-//===----------------------------------------------------------------------===//
-
-def LevelSetAttr :
-    TypedAttrBase<
-      I64, "IntegerAttr",
-      And<[CPred<"::llvm::isa<::mlir::IntegerAttr>($_self)">,
-           CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getType().isInteger(64)">]>,
-      "LevelSet attribute"> {
-  let returnType = [{::mlir::sparse_tensor::LevelSet}];
-  let convertFromStorage = [{::mlir::sparse_tensor::LevelSet($_self.getValue().getZExtValue())}];
-}
-
 //===----------------------------------------------------------------------===//
 // These attributes are just like `IndexAttr` except that they clarify whether
 // the index refers to a dimension (an axis of the semantic tensor) or a level
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index b43d716d5e864..0cfc64f9988a0 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -15,8 +15,6 @@ include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td"
 include "mlir/Dialect/SparseTensor/IR/SparseTensorInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/ControlFlowInterfaces.td"
-include "mlir/Interfaces/LoopLikeInterface.td"
 
 //===----------------------------------------------------------------------===//
 // Base class.
@@ -1279,7 +1277,7 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu
 
 def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator,
     ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp",
-                 "ForeachOp", "IterateOp"]>]>,
+                 "ForeachOp"]>]>,
     Arguments<(ins Variadic<AnyType>:$results)> {
   let summary = "Yield from sparse_tensor set-like operations";
   let description = [{
@@ -1432,154 +1430,6 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
   let hasVerifier = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// Sparse Tensor Iteration Operations.
-//===----------------------------------------------------------------------===//
-
-def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space",
-    [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
-
-  let arguments = (ins AnySparseTensor:$tensor,
-                       Optional<AnySparseIterator>:$parentIter,
-                       LevelAttr:$loLvl, LevelAttr:$hiLvl);
-
-  let results = (outs AnySparseIterSpace:$resultSpace);
-
-  let summary = "Extract an iteration space from a sparse tensor between certain levels";
-  let description = [{
-      Extracts a `!sparse_tensor.iter_space` from a sparse tensor between
-      certian (consecutive) levels.
-
-      `tensor`: the input sparse tensor that defines the iteration space.
-      `parentIter`: the iterator for the previous level, at which the iteration space
-      at the current levels will be extracted.
-      `loLvl`, `hiLvl`: the level range between [loLvl, hiLvl) in the input tensor that
-      the returned iteration space covers. `hiLvl - loLvl` defines the dimension of the
-      iteration space.
-
-      Example:
-      ```mlir
-      // Extracts a 1-D iteration space from a COO tensor at level 1.
-      %space = sparse_tensor.iteration.extract_space %sp at %it1 lvls = 1
-        : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
-      ```
-  }];
-
-
-  let extraClassDeclaration = [{
-    std::pair<Level, Level> getLvlRange() {
-      return std::make_pair(getLoLvl(), getHiLvl());
-    }
-    unsigned getSpaceDim() {
-      return getHiLvl() - getLoLvl();
-    }
-    ArrayRef<::mlir::sparse_tensor::LevelType> getSpaceLvlTypes() {
-      return getResultSpace().getType().getLvlTypes();
-    }
-  }];
-
-  let hasVerifier = 1;
-  let assemblyFormat = "$tensor (`at` $parentIter^)? `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) "
-                       " attr-dict `:` type($tensor) (`,` type($parentIter)^)?";
-}
-
-def IterateOp : SparseTensor_Op<"iterate",
-    [RecursiveMemoryEffects, RecursivelySpeculatable,
-     DeclareOpInterfaceMethods<LoopLikeOpInterface,
-      ["getInitsMutable", "getLoopResults", "getRegionIterArgs",
-       "getYieldedValuesMutable"]>,
-     DeclareOpInterfaceMethods<RegionBranchOpInterface,
-      ["getEntrySuccessorOperands"]>,
-     SingleBlockImplicitTerminator<"sparse_tensor::YieldOp">]> {
-
-  let arguments = (ins AnySparseIterSpace:$iterSpace,
-                       Variadic<AnyType>:$initArgs,
-                       LevelSetAttr:$crdUsedLvls);
-  let results = (outs Variadic<AnyType>:$results);
-  let regions = (region SizedRegion<1>:$region);
-
-  let summary = "Iterate over a sparse iteration space";
-  let description = [{
-      The `sparse_tensor.iterate` operations represents a loop over the
-      provided iteration space extracted from a specific sparse tensor.
-      The operation defines an SSA value for a sparse iterator that points
-      to the current stored element in the sparse tensor and SSA values
-      for coordinates of the stored element. The coordinates are always
-      converted to `index` type despite of the underlying sparse tensor
-      storage. When coordinates are not used, the SSA values can be skipped
-      by `_` symbols, which usually leads to simpler generated code after
-      sparsification. For example:
-
-      ```mlir
-      // The coordinate for level 0 is not used when iterating over a 2-D
-      // iteration space.
-      %sparse_tensor.iterate %iterator in %space at(_, %crd_1)
-        : !sparse_tensor.iter_space<#CSR, lvls = 0 to 2>
-      ```
-
-      `sparse_tensor.iterate` can also operate on loop-carried variables
-      and returns the final values after loop termination.
-      The initial values of the variables are passed as additional SSA operands
-      to the iterator SSA value and used coordinate SSA values mentioned
-      above. The operation region has an argument for the iterator, variadic
-      arguments for specified (used) coordiates and followed by one argument
-      for each loop-carried variable, representing the value of the variable
-      at the current iteration.
-      The body region must contain exactly one block that terminates with
-      `sparse_tensor.yield`.
-
-      `sparse_tensor.iterate` results hold the final values after the last
-      iteration. If the `sparse_tensor.iterate` defines any values, a yield
-      must be explicitly present.
-      The number and types of the `sparse_tensor.iterate` results must match
-      the initial values in the iter_args binding and the yield operands.
-
-
-      A nested `sparse_tensor.iterate` example that prints all the coordinates
-      stored in the sparse input:
-
-      ```mlir
-      func.func @nested_iterate(%sp : tensor<4x8xf32, #COO>) {
-        // Iterates over the first level of %sp
-        %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-        %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd0)
-            : !sparse_tensor.iter_space<#COO, lvls = 0 to 1>  {
-          // Iterates over the second level of %sp
-          %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1
-              : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0 to 1>
-          %r2 = sparse_tensor.iterate %it2 in %l2 at (crd1)
-              : !sparse_tensor.iter_space<#COO, lvls = 1 to 2>  {
-             vector.print %crd0 : index
-             vector.print %crd1 : index
-          }
-        }
-      }
-
-      ```
-  }];
-
-  let extraClassDeclaration = [{
-    unsigned getSpaceDim() {
-      return getIterSpace().getType().getSpaceDim();
-    }
-    BlockArgument getIterator() {
-      return getRegion().getArguments().front();
-    }
-    Block::BlockArgListType getCrds() {
-      // The first block argument is iterator, the remaining arguments are
-      // referenced coordinates.
-      return getRegion().getArguments().slice(1, getCrdUsedLvls().count());
-    }
-    unsigned getNumRegionIterArgs() {
-      return getRegion().getArguments().size() - 1 - getCrdUsedLvls().count();
-    }
-  }];
-
-  let hasVerifier = 1;
-  let hasRegionVerifier = 1;
-  let hasCustomAssemblyFormat = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // Sparse Tensor Debugging and Test-Only Operations.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
index 264a0a5b3bee6..185cff46ae25d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
@@ -72,99 +72,4 @@ def SparseTensorStorageSpecifier
     : Type<CPred<"::llvm::isa<::mlir::sparse_tensor::StorageSpecifierType>($_self)">, "metadata",
           "::mlir::sparse_tensor::StorageSpecifierType">;
 
-//===----------------------------------------------------------------------===//
-// Sparse Tensor Iteration Types.
-//===----------------------------------------------------------------------===//
-
-def SparseTensor_IterSpace : SparseTensor_Type<"IterSpace"> {
-  let mnemonic = "iter_space";
-
-  let description = [{
-    A sparse iteration space that represents an abstract N-D (sparse) iteration space
-    extracted from a sparse tensor.
-
-    Examples:
-
-    ```mlir
-    // An iteration space extracted from a CSR tensor between levels [0, 2).
-    !iter_space<#CSR, lvls = 0 to 2>
-    ```
-  }];
-
-  let parameters = (ins
-     SparseTensorEncodingAttr : $encoding,
-     "Level" : $loLvl,
-     "Level" : $hiLvl
-  );
-
-  let extraClassDeclaration = [{
-     /// The the dimension of the iteration space.
-     unsigned getSpaceDim() const {
-       return getHiLvl() - getLoLvl();
-     }
-
-     /// Get the level types for the iteration space.
-     ArrayRef<LevelType> getLvlTypes() const {
-       return getEncoding().getLvlTypes().slice(getLoLvl(), getSpaceDim());
-     }
-
-     /// Whether the iteration space is unique (i.e., no duplicated coordinate).
-     bool isUnique() {
-       return !getLvlTypes().back().isa<LevelPropNonDefault::Nonunique>();
-     }
-
-     /// Get the corresponding iterator type.
-     ::mlir::sparse_tensor::IteratorType getIteratorType() const;
-  }];
-
-  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
-}
-
-def SparseTensor_Iterator : SparseTensor_Type<"Iterator"> {
-  let mnemonic = "iterator";
-
-  let description = [{
-    An iterator that points to the current element in the corresponding iteration space.
-
-    Examples:
-
-    ```mlir
-    // An iterator that iterates over a iteration space of type `!iter_space<#CSR, lvls = 0 to 2>`
-    !iterator<#CSR, lvls = 0 to 2>
-    ```
-  }];
-
-  let parameters = (ins
-     SparseTensorEncodingAttr : $encoding,
-     "Level" : $loLvl,
-     "Level" : $hiLvl
-  );
-
-  let extraClassDeclaration = [{
-     /// Get the corresponding iteration space type.
-     ::mlir::sparse_tensor::IterSpaceType getIterSpaceType() const;
-
-     unsigned getSpaceDim() const { return getIterSpaceType().getSpaceDim(); }
-     ArrayRef<LevelType> getLvlTypes() const { return getIterSpaceType().getLvlTypes(); }
-     bool isUnique() { return getIterSpaceType().isUnique(); }
-  }];
-
-  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
-}
-
-def IsSparseSparseIterSpaceTypePred
-    : CPred<"::llvm::isa<::mlir::sparse_tensor::IterSpaceType>($_self)">;
-
-def IsSparseSparseIteratorTypePred
-    : CPred<"::llvm::isa<::mlir::sparse_tensor::IteratorType>($_self)">;
-
-def AnySparseIterSpace
-    : Type<IsSparseSparseIterSpaceTypePred, "sparse iteration space",
-          "::mlir::sparse_tensor::IterSpaceType">;
-
-def AnySparseIterator
-    : Type<IsSparseSparseIteratorTypePred, "sparse iterator",
-          "::mlir::sparse_tensor::IteratorType">;
-
-
 #endif // SPARSETENSOR_TYPES
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 36908def09f40..e9058394d33da 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -30,14 +30,6 @@
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrEnums.cpp.inc"
 
-// Forward declarations, following custom print/parsing methods are referenced
-// by the generated code for SparseTensorTypes.td.
-static mlir::ParseResult parseLevelRange(mlir::AsmParser &,
-                                         mlir::sparse_tensor::Level &,
-                                         mlir::sparse_tensor::Level &);
-static void printLevelRange(mlir::AsmPrinter &, mlir::sparse_tensor::Level,
-                            mlir::sparse_tensor::Level);
-
 #define GET_TYPEDEF_CLASSES
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc"
 
@@ -1961,363 +1953,6 @@ LogicalResult SortOp::verify() {
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// Sparse Tensor Iteration Operations.
-//===----------------------------------------------------------------------===//
-
-IterSpaceType IteratorType::getIterSpaceType() const {
-  return IterSpaceType::get(getContext(), getEncoding(), getLoLvl(),
-                            getHiLvl());
-}
-
-IteratorType IterSpaceType::getIteratorType() const {
-  return IteratorType::get(getContext(), getEncoding(), getLoLvl(), getHiLvl());
-}
-
-/// Parses a level range in the form "$lo `to` $hi"
-/// or simply "$lo" if $hi - $lo = 1
-static ParseResult parseLevelRange(AsmParser &parser, Level &lvlLo,
-                                   Level &lvlHi) {
-  if (parser.parseInteger(lvlLo))
-    return failure();
-
-  if (succeeded(parser.parseOptionalKeyword("to"))) {
-    if (parser.parseInteger(lvlHi))
-      return failure();
-  } else {
-    lvlHi = lvlLo + 1;
-  }
-
-  if (lvlHi <= lvlLo)
-    parser.emitError(parser.getNameLoc(),
-                     "expect larger level upper bound than lower bound");
-
-  return success();
-}
-
-/// Parses a level range in the form "$lo `to` $hi"
-/// or simply "$lo" if $hi - $lo = 1
-static ParseResult parseLevelRange(OpAsmParser &parser, IntegerAttr &lvlLoAttr,
-                                   IntegerAttr &lvlHiAttr) {
-  Level lvlLo, lvlHi;
-  if (parseLevelRange(parser, lvlLo, lvlHi))
-    return failure();
-
-  lvlLoAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlLo);
-  lvlHiAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlHi);
-  return success();
-}
-
-/// Prints a level range in the form "$lo `to` $hi"
-/// or simply "$lo" if $hi - $lo = 1
-static void printLevelRange(AsmPrinter &p, Level lo, Level hi) {
-
-  if (lo + 1 == hi)
-    p << lo;
-  else
-    p << lo << " to " << hi;
-}
-
-/// Prints a level range in the form "$lo `to` $hi"
-/// or simply "$lo" if $hi - $lo = 1
-static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo,
-                            IntegerAttr lvlHi) {
-  unsigned lo = lvlLo.getValue().getZExtValue();
-  unsigned hi = lvlHi.getValue().getZExtValue();
-  printLevelRange(p, lo, hi);
-}
-
-static ParseResult
-parseSparseSpaceLoop(OpAsmParser &parser, OperationState &state,
-                     SmallVectorImpl<OpAsmParser::Argument> &iterators,
-                     SmallVectorImpl<OpAsmParser::Argument> &iterArgs) {
-  SmallVector<OpAsmParser::UnresolvedOperand> spaces;
-  SmallVector<OpAsmParser::UnresolvedOperand> initArgs;
-
-  // Parses "%iters, ... in %spaces, ..."
-  if (parser.parseArgumentList(iterators) || parser.parseKeyword("in") ||
-      parser.parseOperandList(spaces))
-    return failure();
-
-  if (iterators.size() != spaces.size())
-    return parser.emitError(
-        parser.getNameLoc(),
-        "mismatch in number of sparse iterators and sparse spaces");
-
-  // Parse "at(%crd0, _, ...)"
-  LevelSet crdUsedLvlSet;
-  bool hasUsedCrds = succeeded(parser.parseOptionalKeyword("at"));
-  unsigned lvlCrdCnt = 0;
-  if (hasUsedCrds) {
-    ParseResult crdList = parser.parseCommaSeparatedList(
-        OpAsmParser::Delimiter::Paren, [&]() -> ParseResult {
-          if (parser.parseOptionalKeyword("_")) {
-            if (parser.parseArgument(iterArgs.emplace_back()))
-              return failure();
-            // Always use IndexType for the coordinate.
-            crdUsedLvlSet.set(lvlCrdCnt);
-            iterArgs.back().type = parser.getBuilder().getIndexType();
-          }
-          lvlCrdCnt += 1;
-          return success();
-        });
-    if (failed(crdList)) {
-      return parser.emitError(
-          parser.getNameLoc(),
-          "expecting SSA value or \"_\" for level coordinates");
-    }
-  }
-  // Set the CrdUsedLvl bitset.
-  state.addAttribute("crdUsedLvls",
-                     parser.getBuilder().getI64IntegerAttr(crdUsedLvlSet));
-
-  // Parse "iter_args(%arg = %init, ...)"
-  bool hasIterArgs = succeeded(parser.parseOptionalKeyword("iter_args"));
-  if (hasIterArgs)
-    if (parser.parseAssignmentList(iterArgs, initArgs))
-      return failure();
-
-  SmallVector<Type> iterSpaceTps;
-  // parse ": sparse_tensor.iter_space -> ret"
-  if (parser.parseColon() || parser.parseTypeList(iterSpaceTps))
-    return failure();
-  if (iterSpaceTps.size() != spaces.size())
-    return parser.emitError(parser.getNameLoc(),
-                            "mismatch in number of iteration space operands "
-                            "and iteration space types");
-
-  for (auto [it, tp] : llvm::zip_equal(iterators, iterSpaceTps)) {
-    IterSpaceType spaceTp = llvm::dyn_cast<IterSpaceType>(tp);
-    if (!spaceTp)
-      return parser.emitError(parser.getNameLoc(),
-                              "expected sparse_tensor.iter_space type for "
-                              "iteration space operands");
-    if (hasUsedCrds && spaceTp.getSpaceDim() != lvlCrdCnt)
-      return parser.emitError(parser.getNameLoc(),
-                              "mismatch in number of iteration space dimension "
-                              "and specified coordinates");
-    it.type = spaceTp.getIteratorType();
-  }
-
-  if (hasIterArgs)
-    if (parser.parseArrowTypeList(state.types))
-      return failure();
-
-  // Resolves input operands.
-  if (parser.resolveOperands(spaces, iterSpaceTps, parser.getNameLoc(),
-                             state.operands))
-    return failure();
-
-  if (hasIterArgs) {
-    unsigned numCrds = crdUsedLvlSet.count();
-    // Strip off leading args that used for coordinates.
-    MutableArrayRef args = MutableArrayRef(iterArgs).drop_front(numCrds);
-    if (args.size() != initArgs.size() || args.size() != state.types.size()) {
-      return parser.emitError(
-          parser.getNameLoc(),
-          "mismatch in number of iteration arguments and return values");
-    }
-
-    for (auto [it, init, tp] : llvm::zip_equal(args, initArgs, state.types)) {
-      it.type = tp;
-      if (parser.resolveOperand(init, tp, state.operands))
-        return failure();
-    }
-  }
-  return success();
-}
-
-LogicalResult ExtractIterSpaceOp::inferReturnTypes(
-    MLIRContext *ctx, std::optional<Location> loc, ValueRange ops,
-    DictionaryAttr attr, OpaqueProperties prop, RegionRange region,
-    SmallVectorImpl<mlir::Type> &ret) {
-
-  ExtractIterSpaceOp::Adaptor adaptor(ops, attr, prop, region);
-  SparseTensorType stt = getSparseTensorType(adaptor.getTensor());
-  ret.push_back(IterSpaceType::get(ctx, stt.getEncoding(), adaptor.getLoLvl(),
-                                   adaptor.getHiLvl()));
-  return success();
-}
-
-LogicalResult ExtractIterSpaceOp::verify() {
-  if (getLoLvl() >= getHiLvl())
-    return emitOpError("expected smaller level low than level high");
-
-  TypedValue<IteratorType> pIter = getParentIter();
-  if ((pIter && getLoLvl() == 0) || (!pIter && getLoLvl() != 0)) {
-    return emitOpError(
-        "parent iterator should be specified iff level lower bound equals 0");
-  }
-
-  if (pIter) {
-    IterSpaceType spaceTp = getResultSpace().getType();
-    if (pIter.getType().getEncoding() != spaceTp.getEncoding())
-      return emitOpError(
-          "mismatch in parent iterator encoding and iteration space encoding.");
-
-    if (spaceTp.getLoLvl() != pIter.getType().getHiLvl())
-      return emitOpError("parent iterator should be used to extract an "
-                         "iteration space from a consecutive level.");
-  }
-
-  return success();
-}
-
-ParseResult IterateOp::parse(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::Argument iterator;
-  OpAsmParser::UnresolvedOperand iterSpace;
-
-  SmallVector<OpAsmParser::Argument> iters, iterArgs;
-  if (parseSparseSpaceLoop(parser, result, iters, iterArgs))
-    return failure();
-  if (iters.size() != 1)
-    return parser.emitError(parser.getNameLoc(),
-                            "expected only one iterator/iteration space");
-
-  iters.append(iterArgs);
-  Region *body = result.addRegion();
-  if (parser.parseRegion(*body, iters))
-    return failure();
-
-  IterateOp::ensureTerminator(*body, parser.getBuilder(), result.location);
-
-  // Parse the optional attribute list.
-  if (parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-
-  return success();
-}
-
-/// Prints the initialization list in the form of
-///   <prefix>(%inner = %outer, %inner2 = %outer2, <...>)
-/// where 'inner' values are assumed to be region arguments and 'outer' values
-/// are regular SSA values.
-static void printInitializationList(OpAsmPrinter &p,
-                                    Block::BlockArgListType blocksArgs,
-                                    ValueRange initializers,
-                                    StringRef prefix = "") {
-  assert(blocksArgs.size() == initializers.size() &&
-         "expected same length of arguments and initializers");
-  if (initializers.empty())
-    return;
-
-  p << prefix << '(';
-  llvm::interleaveComma(llvm::zip(blocksArgs, initializers), p, [&](auto it) {
-    p << std::get<0>(it) << " = " << std::get<1>(it);
-  });
-  p << ")";
-}
-
-static void printUsedCrdsList(OpAsmPrinter &p, unsigned spaceDim,
-                              Block::BlockArgListType blocksArgs,
-                              LevelSet crdUsedLvls) {
-  if (crdUsedLvls.empty())
-    return;
-
-  p << " at(";
-  for (unsigned i = 0; i < spaceDim; i++) {
-    if (crdUsedLvls[i]) {
-      p << blocksArgs.front();
-      blocksArgs = blocksArgs.drop_front();
-    } else {
-      p << "_";
-    }
-    if (i != spaceDim - 1)
-      p << ", ";
-  }
-  assert(blocksArgs.empty());
-  p << ")";
-}
-
-void IterateOp::print(OpAsmPrinter &p) {
-  p << " " << getIterator() << " in " << getIterSpace();
-  printUsedCrdsList(p, getSpaceDim(), getCrds(), getCrdUsedLvls());
-  printInitializationList(p, getRegionIterArgs(), getInitArgs(), " iter_args");
-
-  p << " : " << getIterSpace().getType() << " ";
-  if (!getInitArgs().empty())
-    p << "-> (" << getInitArgs().getTypes() << ") ";
-
-  p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
-                /*printBlockTerminators=*/!getInitArgs().empty());
-}
-
-LogicalResult IterateOp::verify() {
-  if (getInitArgs().size() != getNumResults()) {
-    return emitOpError(
-        "mismatch in number of loop-carried values and defined values");
-  }
-  return success();
-}
-
-LogicalResult IterateOp::verifyRegions() {
-  if (getIterator().getType() != getIterSpace().getType().getIteratorType())
-    return emitOpError("mismatch in iterator and iteration space type");
-  if (getNumRegionIterArgs() != getNumResults())
-    return emitOpError(
-        "mismatch in number of basic block args and defined values");
-
-  auto initArgs = getInitArgs();
-  auto iterArgs = getRegionIterArgs();
-  auto yieldVals = getYieldedValues();
-  auto opResults = getResults();
-  if (!llvm::all_equal({initArgs.size(), iterArgs.size(), yieldVals.size(),
-                        opResults.size()})) {
-    return emitOpError() << "number mismatch between iter args and results.";
-  }
-
-  for (auto [i, init, iter, yield, ret] :
-       llvm::enumerate(initArgs, iterArgs, yieldVals, opResults)) {
-    if (init.getType() != ret.getType())
-      return emitOpError() << "types mismatch between " << i
-                           << "th iter operand and defined value";
-    if (iter.getType() != ret.getType())
-      return emitOpError() << "types mismatch between " << i
-                           << "th iter region arg and defined value";
-    if (yield.getType() != ret.getType())
-      return emitOpError() << "types mismatch between " << i
-                           << "th yield value and defined value";
-  }
-
-  return success();
-}
-
-/// IterateOp implemented OpInterfaces' methods.
-SmallVector<Region *> IterateOp::getLoopRegions() { return {&getRegion()}; }
-
-MutableArrayRef<OpOperand> IterateOp::getInitsMutable() {
-  return getInitArgsMutable();
-}
-
-Block::BlockArgListType IterateOp::getRegionIterArgs() {
-  return getRegion().getArguments().take_back(getNumRegionIterArgs());
-}
-
-std::optional<MutableArrayRef<OpOperand>> IterateOp::getYieldedValuesMutable() {
-  return cast<sparse_tensor::YieldOp>(
-             getRegion().getBlocks().front().getTerminator())
-      .getResultsMutable();
-}
-
-std::optional<ResultRange> IterateOp::getLoopResults() { return getResults(); }
-
-OperandRange IterateOp::getEntrySuccessorOperands(RegionBranchPoint point) {
-  return getInitArgs();
-}
-
-void IterateOp::getSuccessorRegions(RegionBranchPoint point,
-                                    SmallVectorImpl<RegionSuccessor> &regions) {
-  // Both the operation itself and the region may be branching into the body or
-  // back into the operation itself.
-  regions.push_back(RegionSuccessor(&getRegion(), getRegionIterArgs()));
-  // It is possible for loop not to enter the body.
-  regions.push_back(RegionSuccessor(getResults()));
-}
-
-//===----------------------------------------------------------------------===//
-// Sparse Tensor Dialect Setups.
-//===----------------------------------------------------------------------===//
-
 /// Materialize a single constant operation from a given attribute value with
 /// the desired resultant type.
 Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder,
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index b13024cd4ed99..7f5c05190fc9a 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -1012,142 +1012,3 @@ func.func @sparse_print(%arg0: tensor<10x10xf64>) {
   sparse_tensor.print %arg0 : tensor<10x10xf64>
   return
 }
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 2>) {
-  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' expect larger level upper bound than lower bound}}
-  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 to 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 2>
-  return
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
-  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
-  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
-  return
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) {
-  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO>
-  return
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-#CSR = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : dense,
-    j : compressed
-  )
-}>
-
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#CSR, lvls = 0>) {
-  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op mismatch in parent iterator encoding and iteration space encoding.}}
-  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#CSR, lvls = 0>
-  return
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
-  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be used to extract an iteration space from a consecutive level.}}
-  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
-  return
-}
-
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-  // expected-error @+1 {{'sparse_tensor.iterate' op different number of region iter_args and yielded values: 2 != 1}}
-  %r1, %r2 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i, %sj = %j): !sparse_tensor.iter_space<#COO, lvls = 0> -> (index, index) {
-    sparse_tensor.yield %si : index
-  }
-  return %r1 : index
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-// expected-note@+1 {{prior use here}}
-func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index) -> f32 {
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-  // expected-error @+1 {{use of value '%i' expects different type than prior uses: 'f32' vs 'index'}}
-  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> f32 {
-    sparse_tensor.yield %outer : f32
-  }
-  return %r1 : f32
-}
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-  // expected-error @+1 {{'sparse_tensor.iterate' op 0-th region iter_arg and 0-th yielded value have different type: 'index' != 'f32'}}
-  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%si = %i): !sparse_tensor.iter_space<#COO, lvls = 0> -> index {
-    %y = arith.constant 1.0 :  f32
-    sparse_tensor.yield %y : f32
-  }
-  return %r1 : index
-}
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index e9a898f16b41d..12f69c1d37b9c 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -738,56 +738,3 @@ func.func @sparse_has_runtime() -> i1 {
   %has_runtime = sparse_tensor.has_runtime_library
   return %has_runtime : i1
 }
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-// CHECK-LABEL:   func.func @sparse_extract_iter_space(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>,
-// CHECK-SAME:      %[[VAL_1:.*]]: !sparse_tensor.iterator<#sparse{{[0-9]*}}, lvls = 0>)
-// CHECK:           %[[VAL_2:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0
-// CHECK:           %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] at %[[VAL_1]] lvls = 1
-// CHECK:           return %[[VAL_2]], %[[VAL_3]] : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0>, !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 1>
-// CHECK:         }
-func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>)
-  -> (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) {
-  // Extracting the iteration space for the first level needs no parent iterator.
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-  // Extracting the iteration space for the second level needs a parent iterator.
-  %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
-  return %l1, %l2 : !sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>
-}
-
-
-// -----
-
-#COO = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : compressed(nonunique),
-    j : singleton(soa)
-  )
-}>
-
-// CHECK-LABEL:   func.func @sparse_iterate(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>,
-// CHECK-SAME:      %[[VAL_1:.*]]: index,
-// CHECK-SAME:      %[[VAL_2:.*]]: index) -> index {
-// CHECK:           %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0 : tensor<4x8xf32, #sparse{{[0-9]*}}>
-// CHECK:           %[[VAL_4:.*]] = sparse_tensor.iterate %[[VAL_5:.*]] in %[[VAL_3]] at(%[[VAL_6:.*]]) iter_args(%[[VAL_7:.*]] = %[[VAL_1]]) : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0> -> (index) {
-// CHECK:             sparse_tensor.yield %[[VAL_7]] : index
-// CHECK:           }
-// CHECK:           return %[[VAL_4]] : index
-// CHECK:         }
-func.func @sparse_iterate(%sp : tensor<4x8xf32, #COO>, %i : index, %j : index) -> index {
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
-  %r1 = sparse_tensor.iterate %it1 in %l1 at (%crd) iter_args(%outer = %i): !sparse_tensor.iter_space<#COO, lvls = 0 to 1> -> index {
-    sparse_tensor.yield %outer : index
-  }
-  return %r1 : index
-}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir b/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir
deleted file mode 100644
index e7158d04b37fe..0000000000000
--- a/mlir/test/Dialect/SparseTensor/sparse_itertion_licm.mlir
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: mlir-opt %s --loop-invariant-code-motion | FileCheck %s
-
-#CSR = #sparse_tensor.encoding<{
-  map = (i, j) -> (
-    i : dense,
-    j : compressed
-  )
-}>
-
-// Make sure that pure instructions are hoisted outside the loop.
-//
-// CHECK: sparse_tensor.values
-// CHECK: sparse_tensor.positions
-// CHECK: sparse_tensor.coordinate
-// CHECK: sparse_tensor.iterate
-func.func @sparse_iterate(%sp : tensor<?x?xf64, #CSR>) {
-  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<?x?xf64, #CSR>
-  sparse_tensor.iterate %it1 in %l1 at (%crd) : !sparse_tensor.iter_space<#CSR, lvls = 0> {
-    %0 = sparse_tensor.values %sp : tensor<?x?xf64, #CSR> to memref<?xf64>
-    %1 = sparse_tensor.positions %sp { level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
-    %2 = sparse_tensor.coordinates  %sp { level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
-    "test.op"(%0, %1, %2) : (memref<?xf64>, memref<?xindex>, memref<?xindex>) -> ()
-  }
-
-  return
-}

From 481bd5d416df7a1d24e18cc81ae782e8701de965 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming@google.com>
Date: Tue, 16 Apr 2024 11:32:30 -0700
Subject: [PATCH 40/58] [mlir][sparse] introduce
 `sparse_tensor.extract_iteration_space` operation. (#88554)

A `sparse_tensor.extract_space %tensor at %iterator` extracts a *sparse*
iteration space defined `%tensor`, the operation to traverse the
iteration space will be introduced in following PRs.
---
 .../SparseTensor/IR/SparseTensorOps.td        |  60 ++++++++++
 .../SparseTensor/IR/SparseTensorTypes.td      |  97 +++++++++++++++
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 110 ++++++++++++++++++
 mlir/test/Dialect/SparseTensor/invalid.mlir   |  82 +++++++++++++
 mlir/test/Dialect/SparseTensor/roundtrip.mlir |  25 ++++
 5 files changed, 374 insertions(+)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index 0cfc64f9988a0..d7121e8320a4b 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -1430,6 +1430,66 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Operations.
+//===----------------------------------------------------------------------===//
+
+def ExtractIterSpaceOp : SparseTensor_Op<"extract_iteration_space",
+    [Pure, DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
+
+  let arguments = (ins AnySparseTensor:$tensor,
+                       Optional<AnySparseIterator>:$parentIter,
+                       LevelAttr:$loLvl, LevelAttr:$hiLvl);
+
+  let results = (outs AnySparseIterSpace:$resultSpace);
+
+  let summary = "Extracts an iteration space from a sparse tensor between certain levels";
+  let description = [{
+      Extracts a `!sparse_tensor.iter_space` from a sparse tensor between
+      certain (consecutive) levels. For sparse levels, it is usually done by
+      loading a postion range from the underlying sparse tensor storage.
+      E.g., for a compressed level, the iteration space is extracted by
+      [pos[i], pos[i+1]) supposing the the parent iterator points at `i`.
+
+      `tensor`: the input sparse tensor that defines the iteration space.
+      `parentIter`: the iterator for the previous level, at which the iteration space
+      at the current levels will be extracted.
+      `loLvl`, `hiLvl`: the level range between [loLvl, hiLvl) in the input tensor that
+      the returned iteration space covers. `hiLvl - loLvl` defines the dimension of the
+      iteration space.
+
+      The type of returned the value is automatically inferred to
+      `!sparse_tensor.iter_space<#INPUT_ENCODING, lvls = $loLvl to $hiLvl>`.
+      The returned iteration space can then be iterated over by
+      `sparse_tensor.iterate` operations to visit every stored element
+      (usually nonzeros) in the input sparse tensor.
+
+      Example:
+      ```mlir
+      // Extracts a 1-D iteration space from a COO tensor at level 1.
+      %space = sparse_tensor.iteration.extract_space %sp at %it1 lvls = 1
+        : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+      ```
+  }];
+
+
+  let extraClassDeclaration = [{
+    std::pair<Level, Level> getLvlRange() {
+      return std::make_pair(getLoLvl(), getHiLvl());
+    }
+    unsigned getSpaceDim() {
+      return getHiLvl() - getLoLvl();
+    }
+    ArrayRef<::mlir::sparse_tensor::LevelType> getSpaceLvlTypes() {
+      return getResultSpace().getType().getLvlTypes();
+    }
+  }];
+
+  let hasVerifier = 1;
+  let assemblyFormat = "$tensor (`at` $parentIter^)? `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) "
+                       " attr-dict `:` type($tensor) (`,` type($parentIter)^)?";
+}
+
 //===----------------------------------------------------------------------===//
 // Sparse Tensor Debugging and Test-Only Operations.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
index 185cff46ae25d..79113d8778743 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorTypes.td
@@ -72,4 +72,101 @@ def SparseTensorStorageSpecifier
     : Type<CPred<"::llvm::isa<::mlir::sparse_tensor::StorageSpecifierType>($_self)">, "metadata",
           "::mlir::sparse_tensor::StorageSpecifierType">;
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Types.
+//===----------------------------------------------------------------------===//
+
+def SparseTensor_IterSpace : SparseTensor_Type<"IterSpace"> {
+  let mnemonic = "iter_space";
+
+  let description = [{
+    A sparse iteration space that represents an abstract N-D (sparse) iteration space
+    extracted from a sparse tensor, i.e., a set of (crd_0, crd_1, ..., crd_N) for
+    every stored element (usually nonzeros) in a sparse tensor between the specified
+    [$loLvl, $hiLvl) levels.
+
+    Examples:
+
+    ```mlir
+    // An iteration space extracted from a CSR tensor between levels [0, 2).
+    !iter_space<#CSR, lvls = 0 to 2>
+    ```
+  }];
+
+  let parameters = (ins
+     SparseTensorEncodingAttr : $encoding,
+     "Level" : $loLvl,
+     "Level" : $hiLvl
+  );
+
+  let extraClassDeclaration = [{
+     /// The the dimension of the iteration space.
+     unsigned getSpaceDim() const {
+       return getHiLvl() - getLoLvl();
+     }
+
+     /// Get the level types for the iteration space.
+     ArrayRef<LevelType> getLvlTypes() const {
+       return getEncoding().getLvlTypes().slice(getLoLvl(), getSpaceDim());
+     }
+
+     /// Whether the iteration space is unique (i.e., no duplicated coordinate).
+     bool isUnique() {
+       return !getLvlTypes().back().isa<LevelPropNonDefault::Nonunique>();
+     }
+
+     /// Get the corresponding iterator type.
+     ::mlir::sparse_tensor::IteratorType getIteratorType() const;
+  }];
+
+  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
+}
+
+def SparseTensor_Iterator : SparseTensor_Type<"Iterator"> {
+  let mnemonic = "iterator";
+
+  let description = [{
+    An iterator that points to the current element in the corresponding iteration space.
+
+    Examples:
+
+    ```mlir
+    // An iterator that iterates over a iteration space of type `!iter_space<#CSR, lvls = 0 to 2>`
+    !iterator<#CSR, lvls = 0 to 2>
+    ```
+  }];
+
+  let parameters = (ins
+     SparseTensorEncodingAttr : $encoding,
+     "Level" : $loLvl,
+     "Level" : $hiLvl
+  );
+
+  let extraClassDeclaration = [{
+     /// Get the corresponding iteration space type.
+     ::mlir::sparse_tensor::IterSpaceType getIterSpaceType() const;
+
+     unsigned getSpaceDim() const { return getIterSpaceType().getSpaceDim(); }
+     ArrayRef<LevelType> getLvlTypes() const { return getIterSpaceType().getLvlTypes(); }
+     bool isUnique() { return getIterSpaceType().isUnique(); }
+  }];
+
+  let assemblyFormat="`<` $encoding `,` `lvls` `=` custom<LevelRange>($loLvl, $hiLvl) `>`";
+}
+
+def IsSparseSparseIterSpaceTypePred
+    : CPred<"::llvm::isa<::mlir::sparse_tensor::IterSpaceType>($_self)">;
+
+def IsSparseSparseIteratorTypePred
+    : CPred<"::llvm::isa<::mlir::sparse_tensor::IteratorType>($_self)">;
+
+def AnySparseIterSpace
+    : Type<IsSparseSparseIterSpaceTypePred, "sparse iteration space",
+          "::mlir::sparse_tensor::IterSpaceType">;
+
+def AnySparseIterator
+    : Type<IsSparseSparseIteratorTypePred, "sparse iterator",
+          "::mlir::sparse_tensor::IteratorType">;
+
+
 #endif // SPARSETENSOR_TYPES
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index e9058394d33da..516b0943bdcfa 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -30,6 +30,14 @@
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrEnums.cpp.inc"
 
+// Forward declarations, following custom print/parsing methods are referenced
+// by the generated code for SparseTensorTypes.td.
+static mlir::ParseResult parseLevelRange(mlir::AsmParser &,
+                                         mlir::sparse_tensor::Level &,
+                                         mlir::sparse_tensor::Level &);
+static void printLevelRange(mlir::AsmPrinter &, mlir::sparse_tensor::Level,
+                            mlir::sparse_tensor::Level);
+
 #define GET_TYPEDEF_CLASSES
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc"
 
@@ -1953,6 +1961,108 @@ LogicalResult SortOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Iteration Operations.
+//===----------------------------------------------------------------------===//
+
+IterSpaceType IteratorType::getIterSpaceType() const {
+  return IterSpaceType::get(getContext(), getEncoding(), getLoLvl(),
+                            getHiLvl());
+}
+
+IteratorType IterSpaceType::getIteratorType() const {
+  return IteratorType::get(getContext(), getEncoding(), getLoLvl(), getHiLvl());
+}
+
+/// Parses a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static ParseResult parseLevelRange(AsmParser &parser, Level &lvlLo,
+                                   Level &lvlHi) {
+  if (parser.parseInteger(lvlLo))
+    return failure();
+
+  if (succeeded(parser.parseOptionalKeyword("to"))) {
+    if (parser.parseInteger(lvlHi))
+      return failure();
+  } else {
+    lvlHi = lvlLo + 1;
+  }
+
+  if (lvlHi <= lvlLo)
+    parser.emitError(parser.getNameLoc(),
+                     "expect larger level upper bound than lower bound");
+
+  return success();
+}
+
+/// Parses a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static ParseResult parseLevelRange(OpAsmParser &parser, IntegerAttr &lvlLoAttr,
+                                   IntegerAttr &lvlHiAttr) {
+  Level lvlLo, lvlHi;
+  if (parseLevelRange(parser, lvlLo, lvlHi))
+    return failure();
+
+  lvlLoAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlLo);
+  lvlHiAttr = IntegerAttr::get(parser.getBuilder().getIndexType(), lvlHi);
+  return success();
+}
+
+/// Prints a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static void printLevelRange(AsmPrinter &p, Level lo, Level hi) {
+
+  if (lo + 1 == hi)
+    p << lo;
+  else
+    p << lo << " to " << hi;
+}
+
+/// Prints a level range in the form "$lo `to` $hi"
+/// or simply "$lo" if $hi - $lo = 1
+static void printLevelRange(OpAsmPrinter &p, Operation *, IntegerAttr lvlLo,
+                            IntegerAttr lvlHi) {
+  unsigned lo = lvlLo.getValue().getZExtValue();
+  unsigned hi = lvlHi.getValue().getZExtValue();
+  printLevelRange(p, lo, hi);
+}
+
+LogicalResult ExtractIterSpaceOp::inferReturnTypes(
+    MLIRContext *ctx, std::optional<Location> loc, ValueRange ops,
+    DictionaryAttr attr, OpaqueProperties prop, RegionRange region,
+    SmallVectorImpl<mlir::Type> &ret) {
+
+  ExtractIterSpaceOp::Adaptor adaptor(ops, attr, prop, region);
+  SparseTensorType stt = getSparseTensorType(adaptor.getTensor());
+  ret.push_back(IterSpaceType::get(ctx, stt.getEncoding(), adaptor.getLoLvl(),
+                                   adaptor.getHiLvl()));
+  return success();
+}
+
+LogicalResult ExtractIterSpaceOp::verify() {
+  if (getLoLvl() >= getHiLvl())
+    return emitOpError("expected smaller level low than level high");
+
+  TypedValue<IteratorType> pIter = getParentIter();
+  if ((pIter && getLoLvl() == 0) || (!pIter && getLoLvl() != 0)) {
+    return emitOpError(
+        "parent iterator should be specified iff level lower bound equals 0");
+  }
+
+  if (pIter) {
+    IterSpaceType spaceTp = getResultSpace().getType();
+    if (pIter.getType().getEncoding() != spaceTp.getEncoding())
+      return emitOpError(
+          "mismatch in parent iterator encoding and iteration space encoding.");
+
+    if (spaceTp.getLoLvl() != pIter.getType().getHiLvl())
+      return emitOpError("parent iterator should be used to extract an "
+                         "iteration space from a consecutive level.");
+  }
+
+  return success();
+}
+
 /// Materialize a single constant operation from a given attribute value with
 /// the desired resultant type.
 Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder,
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index 7f5c05190fc9a..3fa696e1600a9 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -1012,3 +1012,85 @@ func.func @sparse_print(%arg0: tensor<10x10xf64>) {
   sparse_tensor.print %arg0 : tensor<10x10xf64>
   return
 }
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 2>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' expect larger level upper bound than lower bound}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 to 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 2>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 0 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be specified iff level lower bound equals 0}}
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 1 : tensor<4x8xf32, #COO>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+#CSR = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : dense,
+    j : compressed
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#CSR, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op mismatch in parent iterator encoding and iteration space encoding.}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#CSR, lvls = 0>
+  return
+}
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>) {
+  // expected-error@+1 {{'sparse_tensor.extract_iteration_space' op parent iterator should be used to extract an iteration space from a consecutive level.}}
+  %l1 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 2 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return
+}
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 12f69c1d37b9c..d34071279e512 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -738,3 +738,28 @@ func.func @sparse_has_runtime() -> i1 {
   %has_runtime = sparse_tensor.has_runtime_library
   return %has_runtime : i1
 }
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i : compressed(nonunique),
+    j : singleton(soa)
+  )
+}>
+
+// CHECK-LABEL:   func.func @sparse_extract_iter_space(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: !sparse_tensor.iterator<#sparse{{[0-9]*}}, lvls = 0>)
+// CHECK:           %[[VAL_2:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] lvls = 0
+// CHECK:           %[[VAL_3:.*]] = sparse_tensor.extract_iteration_space %[[VAL_0]] at %[[VAL_1]] lvls = 1
+// CHECK:           return %[[VAL_2]], %[[VAL_3]] : !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 0>, !sparse_tensor.iter_space<#sparse{{[0-9]*}}, lvls = 1>
+// CHECK:         }
+func.func @sparse_extract_iter_space(%sp : tensor<4x8xf32, #COO>, %it1 : !sparse_tensor.iterator<#COO, lvls = 0>)
+  -> (!sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>) {
+  // Extracting the iteration space for the first level needs no parent iterator.
+  %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 : tensor<4x8xf32, #COO>
+  // Extracting the iteration space for the second level needs a parent iterator.
+  %l2 = sparse_tensor.extract_iteration_space %sp at %it1 lvls = 1 : tensor<4x8xf32, #COO>, !sparse_tensor.iterator<#COO, lvls = 0>
+  return %l1, %l2 : !sparse_tensor.iter_space<#COO, lvls = 0>, !sparse_tensor.iter_space<#COO, lvls = 1>
+}

From edb711d2f318b17489692b5f85028fab7ed85b83 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Tue, 16 Apr 2024 13:35:23 -0500
Subject: [PATCH 41/58] [InstCombine] Update `vector_reduce_and` tests to
 actually use `llvm.vector.reduce.and`; NFC

---
 llvm/test/Transforms/InstCombine/known-bits.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index 85a21332b0788..8b4249b2c25a9 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1223,7 +1223,7 @@ define i8 @known_reduce_and(<2 x i8> %xx) {
 ; CHECK-NEXT:    ret i8 1
 ;
   %x = or <2 x i8> %xx, <i8 5, i8 3>
-  %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x)
+  %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x)
   %r = and i8 %v, 1
   ret i8 %r
 }
@@ -1231,12 +1231,12 @@ define i8 @known_reduce_and(<2 x i8> %xx) {
 define i8 @known_reduce_and_fail(<2 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_and_fail(
 ; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 5, i8 3>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> [[X]])
 ; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 2
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %x = or <2 x i8> %xx, <i8 5, i8 3>
-  %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x)
+  %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x)
   %r = and i8 %v, 2
   ret i8 %r
 }

From 5c6af605b307213453a9a043532b9293db21b5c6 Mon Sep 17 00:00:00 2001
From: mahtohappy <Happy.Kumar@Windriver.com>
Date: Wed, 17 Apr 2024 00:12:14 +0530
Subject: [PATCH 42/58] [Clang][Sema] placement new initializes typedef array
 with correct size (#88902)

Build Failure Fix
Fixes build failures due to #83124
---
 .../{instantiate-new-placement-size.cpp => PR41441.cpp}       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename clang/test/SemaCXX/{instantiate-new-placement-size.cpp => PR41441.cpp} (75%)

diff --git a/clang/test/SemaCXX/instantiate-new-placement-size.cpp b/clang/test/SemaCXX/PR41441.cpp
similarity index 75%
rename from clang/test/SemaCXX/instantiate-new-placement-size.cpp
rename to clang/test/SemaCXX/PR41441.cpp
index 7a29d3dee8491..0b012b33fce34 100644
--- a/clang/test/SemaCXX/instantiate-new-placement-size.cpp
+++ b/clang/test/SemaCXX/PR41441.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang -S -fno-discard-value-names -emit-llvm -o - %s | FileCheck %s
-// Issue no: 41441
+// RUN: %clang --target=x86_64-pc-linux -S -fno-discard-value-names -emit-llvm -o - %s | FileCheck %s
+
 #include <new>
 
 // CHECK: call void @llvm.memset.p0.i64(ptr align 1 %x, i8 0, i64 8, i1 false)

From b01879ec1ffbd249f9bf3c4f32308443be6ac36b Mon Sep 17 00:00:00 2001
From: Chao Chen <116223022+chencha3@users.noreply.github.com>
Date: Tue, 16 Apr 2024 13:44:14 -0500
Subject: [PATCH 43/58] [MLIR][XeGPU] Add XeGPU scattered ops (#86594)

- Extended TensorDescAttr with scattered attribute
- Add scattered ops: CreateDescOp, PrefetchOp, LoadGatherOp,
StoreScatterOp, UpdateOffsetOp
- Add a block op: UpdateNdOffsetOp

---------

Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
Co-authored-by: Adam Siemieniuk <adam.siemieniuk@intel.com>
---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h    |   1 +
 .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td       |  42 +-
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 464 +++++++++++++++---
 .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td       |  41 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp    |  21 +
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 254 +++++++++-
 mlir/test/Dialect/XeGPU/XeGPUOps.mlir         |  62 +++
 mlir/test/Dialect/XeGPU/invalid.mlir          | 159 ++++++
 8 files changed, 937 insertions(+), 107 deletions(-)
 create mode 100644 mlir/test/Dialect/XeGPU/invalid.mlir

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 87aabdc015fea..eca9255ff3974 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -12,6 +12,7 @@
 #include "mlir/Bytecode/BytecodeOpInterface.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
+#include "mlir/IR/TypeUtilities.h"
 #include "mlir/Interfaces/ShapedOpInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index cd38549f1ccf4..6579d07ec2621 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -19,17 +19,36 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
 }
 
 def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
+  let summary = [{a composite attribute for `TensorDescType`}];
+  let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite
+    attribute defined for `TensorDescType` for describing following
+    properties of a `TensorDesc`.
+    1. `memory_scope`: It describes where the data block described by the
+        TensorDesc is located, `Global` device memory or `Shared` local memory.
+        It is default to `Global`.
+    2. `array_length`: It describes how many horizontally consecutive blocks
+        will be loaded by a hardware load instruction. If the TensorDesc shape
+        is 8x16, with array_length = 2. The loaded block shape will be acctually
+        8x32. Its default value is 1.
+    3. `boundary_check`: It is used to indicates the hardware whether to do
+        out-of-boundary check. The default value is true.
+    4. `scattered`: It is used to differenciate TensorDescs created from
+       `create_nd_tdesc` vs from `create_tdesc`.
+  }];
+
   let parameters = (ins
     OptionalParameter<"MemoryScopeAttr">: $memory_scope,
     OptionalParameter<"IntegerAttr", "1">: $array_length,
-    OptionalParameter<"BoolAttr", "true">: $boundary_check
+    OptionalParameter<"BoolAttr", "true">: $boundary_check,
+    OptionalParameter<"BoolAttr", "false">: $scattered
   );
 
   let builders = [
     AttrBuilder<(ins
       CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
       CArg<"int", "1">:$array_length,
-      CArg<"bool", "true">: $boundary_check
+      CArg<"bool", "true">: $boundary_check,
+      CArg<"bool", "false">: $scattered
     )>
   ];
 
@@ -41,15 +60,17 @@ def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
 //===----------------------------------------------------------------------===//
 def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
 def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
-def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", 
-      "The address space of the memory the tensor descritor is created for", 
+def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
+      "The address space of the memory the tensor descritor is created for",
       [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::xegpu";
 }
 
-def XeGPU_MemoryScopeAttr: 
+def XeGPU_MemoryScopeAttr:
   EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
+    let summary = [{Describe the location of data described by a `TensorDesc`:
+                 Global device memory (`Global`) or Shared local memory (`SLM`).}];
     let assemblyFormat = "$value";
 }
 
@@ -63,19 +84,18 @@ def XeGPU_CachePolicyInvalid:       I32EnumAttrCase<"READ_INVALIDATE", 3, "read_
 def XeGPU_CachePolicyWriteBack:     I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
 def XeGPU_CachePolicyWriteThrough:  I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
 
-def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", 
-  [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, 
+def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy",
+  [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached,
    XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
    XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
   let genSpecializedAttr = 0;
   let cppNamespace = "::mlir::xegpu";
 }
 
-def XeGPU_CacheHintAttr 
+def XeGPU_CacheHintAttr
   : EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
+    let summary = [{Describe the cache settings for prefetch/load/store operators}];
     let assemblyFormat = "`<` $value `>`";
 }
 
-
-
-#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
\ No newline at end of file
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index b8ebd1a40c607..c6f7f83441b96 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -47,36 +47,35 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
 }
 
 
-def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, 
+def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface,
                         AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
 
   let summary = "Create nd-tensor descriptor operation";
   let description = [{
     The "create_nd_tdesc" operation creates a TensorDescType which represents
     a sub-view of a 2D memory region (It can be extended to support n-D memory
-    region if needed in future). Elements in the subview continuous in each 
-    dimention. It encodes the following important information for supporting 
+    region if needed in future). Elements in the subview continuous in each
+    dimension. It encodes the following important information for supporting
     Intel hardware features:
 
-    * source: an object representing (starting address/pointer of) a 2D memory region. 
+    * source: an object representing (starting address/pointer of) a 2D memory region.
         It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
-        for the later case, the shape and layout information of the 2D memory region should 
-        be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters.
-    * offsets: two index values represents offsets from the "source" at the each dimension 
+        for the later case, the shape and layout information of the 2D memory region should
+        be explicitly passed via `shape` and `strides` parameters.
+    * offsets: two index values represents offsets from the "source" at the each dimension
         at which the subview of the target memory will be created. It is encoded via two
-        variables, including "dynamic_offsets" and "static_offsets", such that it can
-        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
-    * shape: the shape information of the memory region pointed by the "source".  It is 
-        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. 
-        But if "source" is simply a pointer represented as uint64_t type, or a memref 
-        type without shape information e.g., memref<?x?xf16>, the shape information has 
-        to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" 
-        only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
-    * strides: the strides of the memory region pointed by the "source". Similar to shape, 
-        it is typically encoded via the MemRefType of the source too. But if "source" is 
-        simply a pointer represented as uint64_t type, or a memref type without shape 
-        information e.g., memref<?x?xf16>, the strides information has to be explicitly 
-        passed via the "dynamic_strides" argument. And it currently only accepts operands two.
+        variables, including "offsets" and "const_offsets", such that it can
+        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4]).
+    * shape: the shape information of the memory region pointed by the "source".  It is
+        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>.
+        But if "source" is simply a pointer represented as uint64_t type, or a memref
+        type without shape information e.g., memref<?x?xf16>, the shape information has
+        to be explicitly passed via the "shape" and "const_shape" arguments.
+    * strides: the strides of the memory region pointed by the "source". Similar to shape,
+        it is typically encoded via the MemRefType of the source too. But if "source" is
+        simply a pointer represented as uint64_t type, or a memref type without shape
+        information e.g., memref<?x?xf16>, the strides information has to be explicitly
+        passed via the "strides" and "const_strides" argument.
 
     Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
     %0 = memref.alloc() : memref<1024x1024xf32>
@@ -97,10 +96,10 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
   }];
 
-  let arguments = (ins 
-    XeGPU_BaseAddrType: $source, 
-    Variadic<Index>: $offsets, 
-    Variadic<Index>: $shape, 
+  let arguments = (ins
+    XeGPU_BaseAddrType: $source,
+    Variadic<Index>: $offsets,
+    Variadic<Index>: $shape,
     Variadic<Index>: $strides,
     DenseI64ArrayAttr: $const_offsets,
     OptionalAttr<DenseI64ArrayAttr>: $const_shape,
@@ -119,12 +118,12 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
   let hasVerifier = 1;
 
   let builders = [
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source, 
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source,
                    "llvm::ArrayRef<OpFoldResult>": $offsets)>,
 
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source, 
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source,
                    "llvm::ArrayRef<OpFoldResult>": $offsets,
-                   "llvm::ArrayRef<OpFoldResult>": $shape, 
+                   "llvm::ArrayRef<OpFoldResult>": $shape,
                    "llvm::ArrayRef<OpFoldResult>": $strides)>
   ];
 
@@ -159,41 +158,41 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     }
 
     /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType or `const_shape` is filled, 
+    /// If source is IntegerType or `const_shape` is filled,
     /// it will return `const_shape`, such that mixes of `shape`
-    /// and `const_shape` will be used to represent the shape of 
+    /// and `const_shape` will be used to represent the shape of
     /// source operand. They overide static shape from source memref type.
     ArrayRef<int64_t> getStaticSizes() {
       auto attr = getConstShapeAttr();
       if (getSourceType().isa<IntegerType>() || attr)
         return attr;
-      
+
       auto memrefType = getSourceType().dyn_cast<MemRefType>();
       assert(memrefType && "Incorrect use of getStaticSizes");
       return memrefType.getShape();
     }
 
     /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType or `const_strides` is filled, it 
+    /// If source is IntegerType or `const_strides` is filled, it
     /// will return `const_strides`, such that mixes of `strides`
-    /// and `const_strides` will be used to represent the strides of 
+    /// and `const_strides` will be used to represent the strides of
     /// source operand. They overide static strides from source memref type.
     ArrayRef<int64_t> getStaticStrides() {
       auto attr = getConstStridesAttr();
       if (getSourceType().isa<IntegerType>() || attr)
         return attr;
-      
+
       auto memrefType = getSourceType().dyn_cast<MemRefType>();
       assert(memrefType && "Incorrect use of getStaticStrides");
       auto [strides, offset] = getStridesAndOffset(memrefType);
-      // reuse the storage of ConstStridesAttr since strides from 
+      // reuse the storage of ConstStridesAttr since strides from
       // memref is not persistant
       setConstStrides(strides);
       attr = getConstStridesAttr();
       return attr;
     }
 
-    /// Return the expected rank of each of the`static_offsets`, 
+    /// Return the expected rank of each of the`static_offsets`,
     /// `static_shape` and `static_strides` attributes.
     std::array<unsigned, 3> getArrayAttrMaxRanks() {
       unsigned rank;
@@ -204,8 +203,8 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
       }
       return {rank, rank, rank};
     }
-    
-    /// Return the number of leading operands before the `offsets`, 
+
+    /// Return the number of leading operands before the `offsets`,
     /// `shape` and `strides` operands.
     static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
 
@@ -214,15 +213,15 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 }
 
 def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
-  let summary = "prefetches a nD block to cache";
+  let summary = "prefetches a n-D block to cache";
   let description = [{
-    It issues an instruction to prefetch the data from memory to each 
-    level of the cache based on their cache policy.
+    It issues an instruction to prefetch a block of data from continuous
+    memory regions to each level of the cache based on their cache policy.
 
     Example:
     ```
-      xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>, 
-                                l2_hint = #xegpu.cache_hint<cached>, 
+      xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>,
+                                l2_hint = #xegpu.cache_hint<cached>,
                                 l3_hint = #xegpu.cache_hint<cached>}
         : !xegpu.tensor_desc<8x16xf16>
     ```
@@ -233,34 +232,41 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
                        OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-                       
-  let extraClassDeclaration = extraBaseClassDeclaration;
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+  }];
 
   let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
+
+  let hasVerifier = 1;
 }
 
 
-def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
-  let summary = "loads a n-D block from memory (represented by TensorDesc)" 
+def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [AllElementTypesMatch<["value", "TensorDesc"]>,
+                                         AllElementCountsMatch<["value", "TensorDesc"]>]> {
+  let summary = "loads a n-D block from memory (represented by TensorDesc)"
                 "to registers (represented by vector)";
   let description = [{
-    LoadNdOp essentially mimics the hardware block read instruction to read 
-    a block of data from memory to register. It takes a set of optional cache 
-    hints for each level of cache, L1, L2 and L3. If hardware does not have a 
+    LoadNdOp essentially mimics the hardware block read instruction to read
+    a block of data from memory to register. It takes a set of optional cache
+    hints for each level of cache, L1, L2 and L3. If hardware does not have a
     correspoding cache, Corresponding cache hint attribute will be masked.
-    vnni transform is an hardware feature for Intel GPU, which is used to 
-    do data packing during the load for B operand of matrix operation, if 
-    the bit width of the data type is less then 32 bits, e.g., fp16. And 
+    vnni transform is an hardware feature for Intel GPU, which is used to
+    do data packing during the load for B operand of matrix operation, if
+    the bit width of the data type is less then 32 bits, e.g., fp16. And
     transpose is another Intel hardware feature, which will do transpose
-    operation when loading the data if the bit width of the data type is 
-    fp32 or fp64. It implies that vnni and transpose cannot exit at the 
+    operation when loading the data if the bit width of the data type is
+    fp32 or fp64. It implies that vnni and transpose cannot exit at the
     same time.
 
     Example:
     ```
       xegpu.load_nd %1 {transpose = [1, 0],
-                        l1_hint = #xegpu.cache_hint<cached>, 
-                        l2_hint = #xegpu.cache_hint<uncached>, 
+                        l1_hint = #xegpu.cache_hint<cached>,
+                        l2_hint = #xegpu.cache_hint<uncached>,
                         l3_hint = #xegpu.cache_hint<streaming>}
               : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
     ```
@@ -291,20 +297,21 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
   let hasVerifier = 1;
 }
 
-def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
+def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [AllShapesMatch<["value", "TensorDesc"]>,
+                                       AllElementTypesMatch<["value", "TensorDesc"]>]> {
   let summary = "stores a n-D block register region back to memory, currently only supports 2D";
 
   let description = [{
     StoreNdOp essentially mimics the hardware block write instruction io
-    write a block of data from register into the memory region as described 
-    by the TensorDesc. It takes a set of optional cache hints for each level 
-    of cache, L1, L2 and L3. If hardware does not have a correspoding cache, 
+    write a block of data from register into the memory region as described
+    by the TensorDesc. It takes a set of optional cache hints for each level
+    of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
     Corresponding cache hint attribute will be masked.
 
     Example:
     ```
       xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
-                             l2_hint = #xegpu.cache_hint<write_back>, 
+                             l2_hint = #xegpu.cache_hint<write_back>,
                              l3_hint = #xegpu.cache_hint<write_through>}
                              : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
     ```
@@ -318,11 +325,342 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
                        OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
                        OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
 
-  let extraClassDeclaration = extraBaseClassDeclaration;
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    VectorType getValueType() {
+      return llvm::dyn_cast<VectorType>(getValue().getType());
+    }
 
-  let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict 
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+  }];
+
+  let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
                         `:` type($value) `,` qualified(type($TensorDesc))}];
   let hasVerifier = 1;
 }
 
+def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset",
+                [AllTypesMatch<["TensorDesc", "result"]>]> {
+  let summary = "It updates the offsets for the TensorDesc.";
+  let description = [{The op updates the offset of the given TensorDesc.
+    The offsets are relative offset to the current position in the number
+    of elements. It will result in a same type TensorDesc as the input.
+
+  example:
+  ```
+    %2 = xegpu.update_nd_offset %1, [0, 16]: !xegpu.tensor_desc<8x16xf32>
+  ```
+  }];
+
+  let arguments = (ins
+    XeGPU_TensorDesc: $TensorDesc,
+    Variadic<Index>: $offsets,
+    DenseI64ArrayAttr: $const_offsets);
+
+  let results = (outs XeGPU_TensorDesc: $result);
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+
+    SmallVector<OpFoldResult> getMixedOffsets() {
+      Builder b(getContext());
+      return getMixedValues(getConstOffsets(), getOffsets(), b);
+    }
+
+    size_t getNumOffsets() {
+      return getMixedOffsets().size();
+    }
+
+    OpFoldResult getOffset(unsigned idx) {
+      assert(idx < getNumOffsets() && "Invalid out of bound access.");
+      return getMixedOffsets()[idx];
+    }
+  }];
+
+  let assemblyFormat = [{
+    $TensorDesc `,`
+    custom<DynamicIndexList>($offsets, $const_offsets)
+    attr-dict `:` qualified(type($result))
+  }];
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
+  let summary = "create scattered tensor descriptors (TensorDesc).";
+  let description = [{
+    "create_tdesc" is similar to "create_nd_tdesc" in terms that it creates
+    a Tensor Descriptor (TensorDescType) for a memory region. While "create_nd_tdesc"
+    is for creating continuous subviews, "create_tdesc" is for creating non-continuous
+    (scattered) subviews, allowing each work-item in a subgroup specifying their own offset.
+    It accepts the following parameters:
+
+    * source: a 1D memref or pointer (uint64_t) represents the flattened memory object.
+    * offsets: a array containing offsets of each access point. Its size
+      is fixed to the hardware supportted subgroup size, e.g., 16 on PVC,
+      implying each element in the array corresponds to a work-item (SIMT lane)
+      in the subgroup.
+    * chunk_size: [optional attribute] indicates number of continious
+      elements accessed for each offset, default is 1.
+
+    Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
+    ```
+    %a = memref.alloc() : memref<1024xf32>
+    %1 = xegpu.create_tdesc %a[0, 16, 32, 64]: memref<1024xf32> -> TensorDesc<4xf32>
+    ```
+
+    Example 2. It assumes subgroup size is 4, and each workitem access 8 elements.
+               It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
+    ```
+    %0 = memref.alloc() : memref<1024xf32>
+    %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+    ```
+
+    Example 3. It is similar to Example 2, but there is some overlaps among workitems.
+               It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
+    ```
+    %0 = memref.alloc() : memref<1024xf32>
+    %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32>
+    ```
+
+
+
+
+  }];
+
+  let arguments = (ins XeGPU_BaseAddrType: $source,
+                       Variadic<Index>: $offsets,
+                       DenseI64ArrayAttr: $const_offsets,
+                       DefaultValuedAttr<I64Attr, "1">: $chunk_size);
+  let results = (outs XeGPU_TensorDesc:$TensorDesc);
+
+  let builders = [
+    OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source,
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   CArg<"uint32_t", "1"> : $chunk_size)>,
+  ];
+
+  let assemblyFormat = [{
+    $source
+    custom<DynamicIndexList>($offsets, $const_offsets)
+    attr-dict `:`  type($source) `->` qualified(type($TensorDesc))
+  }];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+
+    SmallVector<OpFoldResult> getMixedOffsets() {
+      Builder b(getContext());
+      return getMixedValues(getConstOffsets(), getOffsets(), b);
+    }
+
+    size_t getNumOffsets() {
+      return getMixedOffsets().size();
+    }
+
+    mlir::Value getViewSource() { return getSource(); }
+
+    OpFoldResult getOffset(unsigned idx) {
+      assert(idx < getNumOffsets() && "Invalid out of bound access.");
+      return getMixedOffsets()[idx];
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
+  let summary = "prefetches a set of scattered data points to cache";
+
+  let description = [{
+    It issues instructions to prefetch a set of scattered data points
+    from memory to each level of the cache based on their cache policy.
+    As compared to prefetch_nd, which works on non-scattered TensorDesc,
+    it works on scattered TensorDesc instead.
+
+    Example:
+    ```
+      xegpu.prefetch %tdesc {l1_hint = #xegpu.cache_hint<cached>,
+                             l2_hint = #xegpu.cache_hint<cached>,
+                             l3_hint = #xegpu.cache_hint<cached>}
+        : !xegpu.tensor_desc<16xf16>
+    ```
+
+  }];
+
+  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+  }];
+
+  let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"]>,
+                                    AllElementTypesMatch<["value", "TensorDesc"]>,
+                                   AllElementCountsMatch<["value", "TensorDesc"]>]> {
+  let summary = "load a set of scattered data points from memory.";
+
+  let description = [{ It (aka. load) load data per each work-item. The output
+    describes the data being loaded at the subgroup level, so its size is
+    consistent with the number of work-items in a subgroup. When `chunk_size_per_lane`
+    attribute is larger than 1 in TensorDesc, the output vector will be 2D vector,
+    with dim-1 correspoding to the chunk size.
+
+    The mask operand masks out memory access so that it is safe to pass out-of-boundary
+    addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
+
+  Example:
+  ```
+    %2 = xegpu.load %1, %0 {transpose = [1, 0],
+                            l1_hint = #xegpu.cache_hint<cached>,
+                            l2_hint = #xegpu.cache_hint<uncached>,
+                            l3_hint = #xegpu.cache_hint<uncached>}
+          : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr<scattered=true>>, vector<16xi1>
+            -> vector<16xf32>
+  ```
+
+  }];
+
+  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+                       XeGPU_MaskType: $mask,
+                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+  let results = (outs XeGPU_ValueType: $value);
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+
+    mlir::Type getElementType() {
+      auto type = getValue().getType();
+      return getElementTypeOrSelf(type);
+    }
+
+    Type getValueType() {
+      return getValue().getType();
+    }
+
+    Type getMaskType() {
+      return getMask().getType();
+    }
+
+  }];
+
+  let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict
+      `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}];
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>,
+                                        AllElementTypesMatch<["value", "TensorDesc"]>]> {
+  let summary = "store data to scattered memory locations.";
+  let description = [{ It (aka. store) stores data to scattered memory locations.
+  It has similar semantic to `load_gather`.
+
+  Example:
+  ```
+    %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+                                 l2_hint = #xegpu.cache_hint<write_back>,
+                                 l3_hint = #xegpu.cache_hint<write_through>}
+          : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr<scattered=true>>, vector<16xi1>
+  ```
+  }];
+
+  let arguments = (ins
+    XeGPU_ValueType: $value,
+    XeGPU_TensorDesc: $TensorDesc,
+    XeGPU_MaskType: $mask,
+    OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+    OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+    OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+
+    Type getValueType() {
+      return getValue().getType();
+    }
+
+    Type getMaskType() {
+      return getMask().getType();
+    }
+  }];
+
+  let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict
+            `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}];
+
+  let hasVerifier = 1;
+}
+
+def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset",
+          [AllTypesMatch<["TensorDesc", "result"]>]> {
+  let summary = "It updates the offsets for the given tensor descriptor";
+
+  let description = [{It behaves similar to `update_nd_offset` in terms that
+    it updates offset of a TensorDesc, and the offsets are relative offset to
+    the current position in the number of elements. However, `update_nd_offset`
+    is to update the start point of a 2D block, so its offset constains two
+    elements representing the shift in each dimension. `update_offset` is to
+    update the offset per work-item, so its offsets contains values representing
+    shifts for each work-item.
+
+    Example:
+    ```
+      %2 = xegpu.update_offset %1, [32, 32, 32, 32]
+            : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+    ```
+  }];
+
+  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+                       Variadic<Index>: $offsets,
+                       DenseI64ArrayAttr: $const_offsets);
+  let results = (outs XeGPU_TensorDesc: $result);
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+
+    SmallVector<OpFoldResult> getMixedOffsets() {
+      Builder b(getContext());
+      return getMixedValues(getConstOffsets(), getOffsets(), b);
+    }
+
+    size_t getNumOffsets() {
+      return getMixedOffsets().size();
+    }
+
+    OpFoldResult getOffset(unsigned idx) {
+      assert(idx < getNumOffsets() && "Invalid out of bound access.");
+      return getMixedOffsets()[idx];
+    }
+  }];
+
+  let assemblyFormat = [{
+    $TensorDesc `,`
+    custom<DynamicIndexList>($offsets, $const_offsets)
+    attr-dict `:` qualified(type($TensorDesc))
+  }];
+}
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 19ac1693712dd..4cd4e5411653c 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -34,10 +34,10 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
         [ShapedTypeInterface], "::mlir::TensorType"> {
   let summary = "TensorDesc describing regions of interested data.";
   let description = [{
-    TensorDesc is a type designed to describe regions of the interested data as well as some 
-    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
-    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
-    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
+    TensorDesc is a type designed to describe regions of the interested data as well as some
+    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
+    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
+    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
     It encodes the following information:
 
     * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
@@ -46,15 +46,15 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
               is set or not.
     * element_type: the data type of the data element, e.g., f16, f32.
 
-    Similar to the builtin tensor, it also provides an optinal attribute to encoding 
+    Similar to the builtin tensor, it also provides an optinal attribute to encoding
     the following information via the TensorDescAttr object:
-    * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
+    * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
                 global memory or shared memory. It is default to Global.
     * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
                that will be loaded by block load at a time. It is default to 1.
-    * boundary_check (bool): [optional] indicates whether the operation detects the boundary 
+    * boundary_check (bool): [optional] indicates whether the operation detects the boundary
                 and pads with zero for out-of-boundary access. It is default to do boundary check.
-    
+
 
     Syntax:
 
@@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     element-type ::= float-type | integer-type | index-type
     dim-list := (static-dim-list `x`)?
     static-dim-list ::= decimal-literal `x` decimal-literal
-    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)?
+    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
     ```
 
     Examples:
@@ -84,6 +84,17 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
                         "mlir::Type": $elementType,
                         OptionalParameter<"mlir::Attribute">: $encoding);
 
+  let builders = [
+    TypeBuilderWithInferredContext<(ins
+      "llvm::ArrayRef<int64_t>": $shape,
+      "mlir::Type": $elementType,
+      CArg<"bool", "false">: $scattered,
+      CArg<"int", "1">: $array_length,
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
+      CArg<"bool", "true">: $boundary_check
+    )>
+  ];
+
   let extraClassDeclaration = [{
     using TensorType::clone;
     using mlir::ShapedType::Trait<TensorDescType>::getElementTypeBitWidth;
@@ -116,7 +127,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
       if (attr && attr.getArrayLength())
         return attr.getArrayLength().getInt();
       // return default value
-      return 1; 
+      return 1;
     }
 
     bool getBoundaryCheck() {
@@ -126,10 +137,18 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
       // return default value
       return true;
     }
+
+    bool getScattered() {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getScattered())
+        return attr.getScattered().getValue();
+      // return default value
+      return false;
+    }
   }];
 
   let hasCustomAssemblyFormat = true;
-  
+
 }
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 0b3f4b9c9dbea..24719fe748fe4 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -32,6 +32,17 @@ void XeGPUDialect::initialize() {
 //===----------------------------------------------------------------------===//
 // XeGPU_TensorDescAttr
 //===----------------------------------------------------------------------===//
+TensorDescAttr TensorDescAttr::get(mlir::MLIRContext *context,
+                                   xegpu::MemoryScope memory_scope,
+                                   int array_length, bool boundary_check,
+                                   bool scattered) {
+  auto scopeAttr = MemoryScopeAttr::get(context, memory_scope);
+  auto lengthAttr =
+      IntegerAttr::get(IntegerType::get(context, 64), array_length);
+  auto boundaryAttr = BoolAttr::get(context, boundary_check);
+  auto scatteredAttr = BoolAttr::get(context, scattered);
+  return Base::get(context, scopeAttr, lengthAttr, boundaryAttr, scatteredAttr);
+}
 
 //===----------------------------------------------------------------------===//
 // XeGPU_TensorDescType
@@ -96,6 +107,16 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const {
   printer << ">";
 }
 
+TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
+                                   mlir::Type elementType, bool scattered,
+                                   int array_length, MemoryScope memory_scope,
+                                   bool boundary_check) {
+  auto context = elementType.getContext();
+  auto attr = TensorDescAttr::get(context, memory_scope, array_length,
+                                  boundary_check, scattered);
+  return Base::get(context, shape, elementType, attr);
+}
+
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 02106f221f323..621986c54d492 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -9,6 +9,9 @@
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/TypeUtilities.h"
+
+#include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "xegpu"
 
@@ -16,8 +19,8 @@ namespace mlir {
 namespace xegpu {
 
 static void transpose(llvm::ArrayRef<int64_t> trans,
-                      std::vector<int64_t> &shape) {
-  std::vector<int64_t> old = shape;
+                      SmallVector<int64_t> &shape) {
+  SmallVector<int64_t> old = shape;
   for (size_t i = 0; i < trans.size(); i++)
     shape[i] = old[trans[i]];
 }
@@ -38,6 +41,38 @@ static std::string makeString(T array, bool breakline = false) {
   return buf;
 }
 
+static SmallVector<int64_t> getShapeOf(Type type) {
+  SmallVector<int64_t> shape;
+  if (auto ty = llvm::dyn_cast<ShapedType>(type))
+    shape = SmallVector<int64_t>(ty.getShape());
+  else
+    shape.push_back(1);
+  return shape;
+}
+
+static int64_t getRankOf(Value val) {
+  auto type = val.getType();
+  if (auto ty = llvm::dyn_cast<ShapedType>(type))
+    return ty.getRank();
+  return 0;
+};
+
+static bool isReadHintOrNone(const CachePolicyAttr &attr) {
+  if (!attr)
+    return true;
+  auto kind = attr.getValue();
+  return kind == CachePolicy::CACHED || kind == CachePolicy::UNCACHED ||
+         kind == CachePolicy::STREAMING || kind == CachePolicy::READ_INVALIDATE;
+}
+
+static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
+  if (!attr)
+    return true;
+  auto kind = attr.getValue();
+  return kind == CachePolicy::CACHED || kind == CachePolicy::UNCACHED ||
+         kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
+}
+
 //===----------------------------------------------------------------------===//
 // XeGPU_CreateNdDescOp
 //===----------------------------------------------------------------------===//
@@ -114,6 +149,29 @@ LogicalResult CreateNdDescOp::verify() {
     return emitOpError("TensorDesc should have the same element "
                        "type with the source if it is a memref.\n");
 
+  if (getType().getScattered())
+    return emitOpError("Expects a non-scattered TensorDesc.\n");
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_PrefetchNdOp
+//===----------------------------------------------------------------------===//
+LogicalResult PrefetchNdOp::verify() {
+  auto tdescTy = getTensorDescType();
+  if (tdescTy.getScattered())
+    return emitOpError("Expects a non-scattered TensorDesc.\n");
+
+  if (!isReadHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
+
+  if (!isReadHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isReadHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
+
   return success();
 }
 
@@ -125,22 +183,26 @@ LogicalResult LoadNdOp::verify() {
   auto valueTy = getType();
 
   if (tdescTy.getRank() != 2)
-    return emitOpError(
-        "The TensorDesc for LoadNdOp should be a 2D TensorDesc.");
+    return emitOpError("Expecting a 2D TensorDesc.\n");
+
+  if (tdescTy.getScattered())
+    return emitOpError("Expects a non-scattered TensorDesc.\n");
 
   if (!valueTy)
     return emitOpError("Invalid result, it should be a VectorType.\n");
 
-  auto tdescElemTy = tdescTy.getElementType();
-  auto valueElemTy = valueTy.getElementType();
+  if (!isReadHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
 
-  if (tdescElemTy != valueElemTy)
-    return emitOpError(
-        "Value should have the same element type as TensorDesc.");
+  if (!isReadHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isReadHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
 
   auto array_len = tdescTy.getArrayLength();
-  auto tdescShape = tdescTy.getShape().vec();
-  auto valueShape = valueTy.getShape().vec();
+  auto tdescShape = getShapeOf(tdescTy);
+  auto valueShape = getShapeOf(valueTy);
 
   if (getTranspose()) {
     auto trans = getTranspose().value();
@@ -174,26 +236,174 @@ LogicalResult LoadNdOp::verify() {
 // XeGPU_StoreNdOp
 //===----------------------------------------------------------------------===//
 LogicalResult StoreNdOp::verify() {
-  auto dstTy = getTensorDesc().getType();               // Tile
-  auto valTy = getValue().getType().cast<VectorType>(); // Vector
+  auto dstTy = getTensorDescType(); // Tile
+  auto valTy = getValueType();      // Vector
 
   if (dstTy.getRank() != 2)
-    return emitOpError("Expecting a 2D TensorDesc shape.\n");
+    return emitOpError("Expecting a 2D TensorDesc.\n");
+
+  if (dstTy.getScattered())
+    return emitOpError("Expects a non-scattered TensorDesc.\n");
 
   if (!valTy)
     return emitOpError("Exepcting a VectorType result.\n");
 
-  auto dstElemTy = dstTy.getElementType();
-  auto valElemTy = valTy.getElementType();
+  if (!isWriteHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
+
+  if (!isWriteHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isWriteHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
+
+  return success();
+}
 
-  if (dstElemTy != valElemTy) {
-    return emitOpError() << "The element type of the value should "
-                            "match the elementtype of the TensorDesc.\n";
+//===----------------------------------------------------------------------===//
+// XeGPU_UpdateNDOffsetOp
+//===----------------------------------------------------------------------===//
+LogicalResult UpdateNdOffsetOp::verify() {
+  auto ty = getTensorDescType();
+  if (ty.getScattered())
+    return emitOpError("Expects a non-scattered TensorDesc.\n");
+
+  // number of offsets specified must match the rank of the tensor descriptor
+  if (ty.getRank() != (int64_t)getNumOffsets()) {
+    return emitOpError("Invalid number of offsets.");
   }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_CreateDescOp
+//===----------------------------------------------------------------------===//
+void CreateDescOp::build(OpBuilder &builder, OperationState &state,
+                         TensorDescType TensorDesc, Value source,
+                         llvm::ArrayRef<OpFoldResult> offsets,
+                         uint32_t chunk_size) {
+  llvm::SmallVector<int64_t> staticOffsets;
+  llvm::SmallVector<Value> dynamicOffsets;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+  build(builder, state, TensorDesc, source, dynamicOffsets, staticOffsets,
+        chunk_size);
+}
+
+LogicalResult CreateDescOp::verify() {
+  auto tdescTy = getTensorDescType();
+  auto chunkSize = getChunkSize();
+
+  if (getRankOf(getSource()) > 1)
+    return emitOpError(
+        "Expecting the source is a 1D memref or pointer (uint64_t).");
+
+  if (!tdescTy.getScattered())
+    return emitOpError("Expects a scattered TensorDesc.\n");
+
+  SmallVector<int64_t> shape({(int64_t)getNumOffsets()});
+  if (chunkSize != 1)
+    shape.push_back(chunkSize);
+
+  auto tdescShape = getShapeOf(tdescTy);
+  if (shape != tdescShape)
+    return emitOpError("Incorrect TensorDesc shape. ")
+           << "Expected is " << makeString(shape) << "\n";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_PrefetchOp
+//===----------------------------------------------------------------------===//
+LogicalResult PrefetchOp::verify() {
+  auto tdescTy = getTensorDescType();
+  if (!tdescTy.getScattered())
+    return emitOpError("Expects a scattered TensorDesc.\n");
+
+  if (!isReadHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
+
+  if (!isReadHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isReadHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_LoadGatherOp
+//===----------------------------------------------------------------------===//
+LogicalResult LoadGatherOp::verify() {
+  auto tdescTy = getTensorDescType();
+  auto maskTy = getMaskType();
+  auto valueTy = getValueType();
+
+  if (!tdescTy.getScattered())
+    return emitOpError("Expects a scattered TensorDesc.\n");
+
+  if (!isReadHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
+
+  if (!isReadHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isReadHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
+
+  auto tdescElemTy = tdescTy.getElementType();
+  auto valueElemTy = getElementType();
+  if (tdescElemTy != valueElemTy)
+    return emitOpError(
+        "Value should have the same element type as TensorDesc.");
+
+  auto maskShape = getShapeOf(maskTy);
+  auto valueShape = getShapeOf(valueTy);
+  auto tdescShape = getShapeOf(tdescTy);
+
+  if (tdescShape[0] != maskShape[0])
+    return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
+
+  if (getTransposeAttr()) {
+    auto trans = getTranspose().value();
+    if (tdescShape.size() < trans.size())
+      emitWarning("Invalid transpose attr. It is ignored.");
+    else
+      transpose(trans, tdescShape);
+  }
+
+  if (valueShape != tdescShape)
+    return emitOpError("Unexpected result shape")
+           << "(Expected shape: " << makeString(tdescShape)
+           << ", Given shape: " << makeString(valueShape) << ").\n";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_StoreScatterOp
+//===----------------------------------------------------------------------===//
+LogicalResult StoreScatterOp::verify() {
+  auto tdescTy = getTensorDescType();
+  if (!tdescTy.getScattered())
+    return emitOpError("Expects a scattered TensorDesc.\n");
+
+  if (!isWriteHintOrNone(getL1HintAttr()))
+    return emitOpError("invlid l1_hint: ") << getL1HintAttr();
+
+  if (!isWriteHintOrNone(getL2HintAttr()))
+    return emitOpError("invlid l2_hint: ") << getL2HintAttr();
+
+  if (!isWriteHintOrNone(getL3HintAttr()))
+    return emitOpError("invlid l3_hint: ") << getL3HintAttr();
+
+  auto maskTy = getMaskType();
+  auto maskShape = getShapeOf(maskTy);
+  auto tdescShape = getShapeOf(tdescTy);
+  if (tdescShape[0] != maskShape[0])
+    return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
 
-  if (dstTy.getShape() != valTy.getShape())
-    return emitOpError()
-           << "The result shape should match the TensorDesc shape.\n";
   return success();
 }
 
diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
index 039346adbb851..f0945c79a94ac 100644
--- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
+++ b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
@@ -59,4 +59,66 @@ gpu.func @test_store_nd_vc(%dst: memref<24x32xf16>) {
   gpu.return
 }
 
+// CHECK: gpu.func @test_create_update_nd_tdesc_vc(%[[arg0:.*]]: memref<24x32xf32>) {
+gpu.func @test_create_update_nd_tdesc_vc(%src: memref<24x32xf32>) {
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[R1:.*]] = xegpu.update_nd_offset %[[REG]], [0, 16] : !xegpu.tensor_desc<8x16xf32>
+  %2 = xegpu.update_nd_offset %1, [0, 16]: !xegpu.tensor_desc<8x16xf32>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_create_tdesc_vc(%[[arg0:.*]]: ui64) {
+gpu.func @test_create_tdesc_vc(%src: ui64) {
+  //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) {
+gpu.func @test_prefetch_vc(%src: ui64) {
+  //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>> 
+  gpu.return
+}
+
+// CHECK: gpu.func @test_load_gather_vc(%[[arg0:.*]]: ui64) {
+gpu.func @test_load_gather_vc(%src: ui64) {
+  //CHECK: %[[cst:.*]] = arith.constant dense<true> : vector<4xi1>
+  %0 = arith.constant dense<1>: vector<4xi1>
+  //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+  //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1> -> vector<4x2xf32>
+  %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+        : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1> -> vector<4x2xf32>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_store_scatter_vc(%[[arg0:.*]]: ui64) {
+gpu.func @test_store_scatter_vc(%src: ui64) {
+  //CHECK: %[[c0:.*]] = arith.constant dense<true> : vector<4xi1>
+  %0 = arith.constant dense<1>: vector<4xi1>
+  //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<4x2xf32>
+  %1 = arith.constant dense<2.9>: vector<4x2xf32>
+  //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
+  //CHECK-SAME: vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1>
+  xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>
+        : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_create_update_tdesc_vc(%[[arg0:.*]]: ui64) {
+gpu.func @test_create_update_tdesc_vc(%src: ui64) {
+  //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  gpu.return
+}
+
 }
\ No newline at end of file
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
new file mode 100644
index 0000000000000..5e29361ec6908
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -0,0 +1,159 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics
+
+// -----
+func.func @test_create_nd_tdesc_vc_1(%src: memref<24xf32>) {
+  // expected-error@+1 {{Expecting the rank of shape, strides, offsets, source memref type (if source is a memref) and TensorDesc should match with each other. They currenlty are 2D.}}
+  %1 = xegpu.create_nd_tdesc %src[0] : memref<24xf32> -> !xegpu.tensor_desc<8x16xf32>
+  return
+}
+
+// -----
+
+func.func @test_create_nd_tdesc_vc_2(%src: memref<24x32xf32>) {
+  // expected-error@+1 {{TensorDesc should have the same element type with the source if it is a memref}}
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf16>
+  return
+}
+
+// -----
+func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<write_back>}}
+  xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint<write_back>}>: !xegpu.tensor_desc<8x16xf16>
+  return
+}
+
+// -----
+func.func @test_prefetch_nd_vc_2(%src: memref<24xf16>) {
+  %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7]
+        : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr<scattered=true>>
+  // expected-error@+1 {{Expects a non-scattered TensorDesc}}
+  xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint<cached>}>
+        : !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr<scattered=true>>
+  return
+}
+
+// -----
+func.func @test_load_nd_vc_1(%src: memref<8x16xf16>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<write_back>}}
+  %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<write_back>}>
+      : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16>
+  return
+}
+
+// -----
+func.func @test_load_nd_vc_2(%src: memref<16xf16>) {
+  %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2}
+        : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>>
+  // expected-error@+1 {{Expects a non-scattered TensorDesc.}}
+  %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>}>
+      : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>> -> vector<8x2xf16>
+  return
+}
+
+// -----
+func.func @test_store_nd_vc_1(%dst: memref<24x32xf16>) {
+  %1 = arith.constant dense<1.0>: vector<24x32xf16>
+  %2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<streaming>}}
+  xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint<streaming>}>: vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16>
+  return
+}
+
+// -----
+func.func @test_store_nd_vc_2(%dst: memref<16xf16>) {
+  %1 = arith.constant dense<1.0>: vector<8x2xf16>
+  %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2}
+        : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>>
+  // expected-error@+1 {{Expects a non-scattered TensorDesc}}
+  xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint<streaming>}>
+        : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>>
+  return
+}
+
+// -----
+func.func @test_update_nd_offset_1(%dst: memref<16xf16>) {
+  %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2}
+        : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>>
+  // expected-error@+1 {{Expects a non-scattered TensorDesc}}
+  xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr<scattered=true>>
+  return
+}
+
+// -----
+func.func @test_create_tdesc_vc_1(%src: ui64) {
+  // expected-error@+1 {{Expects a scattered TensorDesc}}
+  %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2}
+        : ui64 -> !xegpu.tensor_desc<8x2xf16>
+  return
+}
+
+// -----
+func.func @test_create_tdesc_vc_2(%src: ui64) {
+  // expected-error@+1 {{Incorrect TensorDesc shape}}
+  %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2}
+        : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.tdesc_attr<scattered = true>>
+  return
+}
+
+// -----
+func.func @test_prefetch_vc_1(%src: memref<24x32xf16>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
+  // expected-error@+1 {{Expects a scattered TensorDesc}}
+  xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint<write_back>}>: !xegpu.tensor_desc<24x32xf16>
+  return
+}
+
+// -----
+func.func @test_prefetch_vc_2(%src: ui64) {
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64  -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<write_back>}}
+  xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint<write_back>}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  return
+}
+
+// -----
+func.func @test_load_gather_vc_1(%src: memref<24x32xf16>) {
+  %0 = arith.constant dense<1>: vector<4xi1>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<4x2xf16>
+  // expected-error@+1 {{Expects a scattered TensorDesc}}
+  %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>}>
+      : !xegpu.tensor_desc<4x2xf16>, vector<4xi1> -> vector<4x2xf16>
+  return
+}
+
+// -----
+func.func @test_load_gather_vc_2(%src: ui64) {
+  %0 = arith.constant dense<1>: vector<4xi1>
+  %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64
+        -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<write_back>}}
+  %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<write_back>}>
+        : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1>
+          -> vector<4x2xf32>
+  return
+}
+
+// -----
+func.func @test_store_scatter_vc_1(%src: memref<24x32xf32>) {
+  %0 = arith.constant dense<1>: vector<4xi1>
+  %1 = arith.constant dense<2.9>: vector<4x2xf32>
+  %2 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<4x2xf32>
+  // expected-error@+1 {{Expects a scattered TensorDesc}}
+  xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint<cached>}>
+        : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32>, vector<4xi1>
+  return
+}
+
+// -----
+func.func @test_store_scatter_vc_2(%src: ui64) {
+  %0 = arith.constant dense<1>: vector<4xi1>
+  %1 = arith.constant dense<2.9>: vector<4x2xf32>
+  %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2}
+          : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>
+  // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint<streaming>}}
+  xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint<streaming>}> : vector<4x2xf32>,
+          !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr<scattered = true>>, vector<4xi1>
+  return
+}
\ No newline at end of file

From 9ec8c961664de3b3fcc1cbd5238e40ec8c9bdddb Mon Sep 17 00:00:00 2001
From: Samira Bazuzi <bazuzi@google.com>
Date: Tue, 16 Apr 2024 14:46:05 -0400
Subject: [PATCH 44/58] [clang][dataflow] Expose getReferencedDecls and
 relocate free functions. (#88754)

Moves free functions from DataflowEnvironment.h/cc and
DataflowAnalysisContext.h/cc to RecordOps and a new ASTOps and exposes
them as needed for current use and to expose getReferencedDecls for
out-of-tree use.

Minimal change in functionality, only to modify the return type of
getReferenceDecls to return the collected decls instead of using output
params.

Tested with `ninja check-clang-tooling`.
---
 clang/docs/tools/clang-formatted-files.txt    |   2 +
 .../clang/Analysis/FlowSensitive/ASTOps.h     |  98 +++++++
 .../FlowSensitive/DataflowAnalysisContext.h   |  28 +-
 .../FlowSensitive/DataflowEnvironment.h       |  36 ---
 clang/lib/Analysis/FlowSensitive/ASTOps.cpp   | 249 ++++++++++++++++++
 .../lib/Analysis/FlowSensitive/CMakeLists.txt |   1 +
 .../FlowSensitive/DataflowAnalysisContext.cpp |  53 +---
 .../FlowSensitive/DataflowEnvironment.cpp     | 177 +------------
 clang/lib/Analysis/FlowSensitive/Transfer.cpp |   2 +
 9 files changed, 359 insertions(+), 287 deletions(-)
 create mode 100644 clang/include/clang/Analysis/FlowSensitive/ASTOps.h
 create mode 100644 clang/lib/Analysis/FlowSensitive/ASTOps.cpp

diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 3089438c23d94..2252d0ccde96d 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -123,6 +123,7 @@ clang/include/clang/Analysis/Analyses/CalledOnceCheck.h
 clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h
 clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
 clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
+clang/include/clang/Analysis/FlowSensitive/ASTOps.h
 clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
 clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
 clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -307,6 +308,7 @@ clang/lib/Analysis/CalledOnceCheck.cpp
 clang/lib/Analysis/CloneDetection.cpp
 clang/lib/Analysis/CodeInjector.cpp
 clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp
+clang/lib/Analysis/FlowSensitive/ASTOps.cpp
 clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
 clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
 clang/lib/Analysis/FlowSensitive/DebugSupport.cpp
diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
new file mode 100644
index 0000000000000..27ad32c1694f7
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -0,0 +1,98 @@
+//===-- ASTOps.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  Operations on AST nodes that are used in flow-sensitive analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/StorageLocation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace clang {
+namespace dataflow {
+
+/// Skip past nodes that the CFG does not emit. These nodes are invisible to
+/// flow-sensitive analysis, and should be ignored as they will effectively not
+/// exist.
+///
+///   * `ParenExpr` - The CFG takes the operator precedence into account, but
+///   otherwise omits the node afterwards.
+///
+///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
+///   destructors and then omit the node.
+///
+const Expr &ignoreCFGOmittedNodes(const Expr &E);
+const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
+
+/// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic
+/// iteration order.
+using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>;
+
+/// Returns the set of all fields in the type.
+FieldSet getObjectFields(QualType Type);
+
+/// Returns whether `Fields` and `FieldLocs` contain the same fields.
+bool containsSameFields(const FieldSet &Fields,
+                        const RecordStorageLocation::FieldToLoc &FieldLocs);
+
+/// Helper class for initialization of a record with an `InitListExpr`.
+/// `InitListExpr::inits()` contains the initializers for both the base classes
+/// and the fields of the record; this helper class separates these out into two
+/// different lists. In addition, it deals with special cases associated with
+/// unions.
+class RecordInitListHelper {
+public:
+  // `InitList` must have record type.
+  RecordInitListHelper(const InitListExpr *InitList);
+
+  // Base classes with their associated initializer expressions.
+  ArrayRef<std::pair<const CXXBaseSpecifier *, Expr *>> base_inits() const {
+    return BaseInits;
+  }
+
+  // Fields with their associated initializer expressions.
+  ArrayRef<std::pair<const FieldDecl *, Expr *>> field_inits() const {
+    return FieldInits;
+  }
+
+private:
+  SmallVector<std::pair<const CXXBaseSpecifier *, Expr *>> BaseInits;
+  SmallVector<std::pair<const FieldDecl *, Expr *>> FieldInits;
+
+  // We potentially synthesize an `ImplicitValueInitExpr` for unions. It's a
+  // member variable because we store a pointer to it in `FieldInits`.
+  std::optional<ImplicitValueInitExpr> ImplicitValueInitForUnion;
+};
+
+/// A collection of several types of declarations, all referenced from the same
+/// function.
+struct ReferencedDecls {
+  /// Non-static member variables.
+  FieldSet Fields;
+  /// All variables with static storage duration, notably including static
+  /// member variables and static variables declared within a function.
+  llvm::DenseSet<const VarDecl *> Globals;
+  /// Free functions and member functions which are referenced (but not
+  /// necessarily called).
+  llvm::DenseSet<const FunctionDecl *> Functions;
+};
+
+/// Returns declarations that are declared in or referenced from `FD`.
+ReferencedDecls getReferencedDecls(const FunctionDecl &FD);
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_ASTOPS_H
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 909a91059438c..aa2c366cb164a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -18,6 +18,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/TypeOrdering.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
 #include "clang/Analysis/FlowSensitive/Arena.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
@@ -30,38 +31,11 @@
 #include <cassert>
 #include <memory>
 #include <optional>
-#include <type_traits>
-#include <utility>
-#include <vector>
 
 namespace clang {
 namespace dataflow {
 class Logger;
 
-/// Skip past nodes that the CFG does not emit. These nodes are invisible to
-/// flow-sensitive analysis, and should be ignored as they will effectively not
-/// exist.
-///
-///   * `ParenExpr` - The CFG takes the operator precedence into account, but
-///   otherwise omits the node afterwards.
-///
-///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
-///   destructors and then omit the node.
-///
-const Expr &ignoreCFGOmittedNodes(const Expr &E);
-const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
-
-/// A set of `FieldDecl *`. Use `SmallSetVector` to guarantee deterministic
-/// iteration order.
-using FieldSet = llvm::SmallSetVector<const FieldDecl *, 4>;
-
-/// Returns the set of all fields in the type.
-FieldSet getObjectFields(QualType Type);
-
-/// Returns whether `Fields` and `FieldLocs` contain the same fields.
-bool containsSameFields(const FieldSet &Fields,
-                        const RecordStorageLocation::FieldToLoc &FieldLocs);
-
 struct ContextSensitiveOptions {
   /// The maximum depth to analyze. A value of zero is equivalent to disabling
   /// context-sensitive analysis entirely.
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 706664d7db1c2..4277792219c0a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -775,42 +775,6 @@ RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
                                              const Environment &Env);
 
-/// Returns the fields of a `RecordDecl` that are initialized by an
-/// `InitListExpr`, in the order in which they appear in
-/// `InitListExpr::inits()`.
-/// `Init->getType()` must be a record type.
-std::vector<const FieldDecl *>
-getFieldsForInitListExpr(const InitListExpr *InitList);
-
-/// Helper class for initialization of a record with an `InitListExpr`.
-/// `InitListExpr::inits()` contains the initializers for both the base classes
-/// and the fields of the record; this helper class separates these out into two
-/// different lists. In addition, it deals with special cases associated with
-/// unions.
-class RecordInitListHelper {
-public:
-  // `InitList` must have record type.
-  RecordInitListHelper(const InitListExpr *InitList);
-
-  // Base classes with their associated initializer expressions.
-  ArrayRef<std::pair<const CXXBaseSpecifier *, Expr *>> base_inits() const {
-    return BaseInits;
-  }
-
-  // Fields with their associated initializer expressions.
-  ArrayRef<std::pair<const FieldDecl *, Expr *>> field_inits() const {
-    return FieldInits;
-  }
-
-private:
-  SmallVector<std::pair<const CXXBaseSpecifier *, Expr *>> BaseInits;
-  SmallVector<std::pair<const FieldDecl *, Expr *>> FieldInits;
-
-  // We potentially synthesize an `ImplicitValueInitExpr` for unions. It's a
-  // member variable because we store a pointer to it in `FieldInits`.
-  std::optional<ImplicitValueInitExpr> ImplicitValueInitForUnion;
-};
-
 /// Associates a new `RecordValue` with `Loc` and returns the new value.
 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env);
 
diff --git a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
new file mode 100644
index 0000000000000..75188aef4d1a4
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
@@ -0,0 +1,249 @@
+//===-- ASTOps.cc -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  Operations on AST nodes that are used in flow-sensitive analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/AST/ComputeDependence.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/StorageLocation.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cassert>
+#include <iterator>
+#include <vector>
+
+#define DEBUG_TYPE "dataflow"
+
+namespace clang::dataflow {
+
+const Expr &ignoreCFGOmittedNodes(const Expr &E) {
+  const Expr *Current = &E;
+  if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
+    Current = EWC->getSubExpr();
+    assert(Current != nullptr);
+  }
+  Current = Current->IgnoreParens();
+  assert(Current != nullptr);
+  return *Current;
+}
+
+const Stmt &ignoreCFGOmittedNodes(const Stmt &S) {
+  if (auto *E = dyn_cast<Expr>(&S))
+    return ignoreCFGOmittedNodes(*E);
+  return S;
+}
+
+// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
+// field decl will be modeled for all instances of the inherited field.
+static void getFieldsFromClassHierarchy(QualType Type, FieldSet &Fields) {
+  if (Type->isIncompleteType() || Type->isDependentType() ||
+      !Type->isRecordType())
+    return;
+
+  for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
+    Fields.insert(Field);
+  if (auto *CXXRecord = Type->getAsCXXRecordDecl())
+    for (const CXXBaseSpecifier &Base : CXXRecord->bases())
+      getFieldsFromClassHierarchy(Base.getType(), Fields);
+}
+
+/// Gets the set of all fields in the type.
+FieldSet getObjectFields(QualType Type) {
+  FieldSet Fields;
+  getFieldsFromClassHierarchy(Type, Fields);
+  return Fields;
+}
+
+bool containsSameFields(const FieldSet &Fields,
+                        const RecordStorageLocation::FieldToLoc &FieldLocs) {
+  if (Fields.size() != FieldLocs.size())
+    return false;
+  for ([[maybe_unused]] auto [Field, Loc] : FieldLocs)
+    if (!Fields.contains(cast_or_null<FieldDecl>(Field)))
+      return false;
+  return true;
+}
+
+/// Returns the fields of a `RecordDecl` that are initialized by an
+/// `InitListExpr`, in the order in which they appear in
+/// `InitListExpr::inits()`.
+/// `Init->getType()` must be a record type.
+static std::vector<const FieldDecl *>
+getFieldsForInitListExpr(const InitListExpr *InitList) {
+  const RecordDecl *RD = InitList->getType()->getAsRecordDecl();
+  assert(RD != nullptr);
+
+  std::vector<const FieldDecl *> Fields;
+
+  if (InitList->getType()->isUnionType()) {
+    Fields.push_back(InitList->getInitializedFieldInUnion());
+    return Fields;
+  }
+
+  // Unnamed bitfields are only used for padding and do not appear in
+  // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
+  // field list, and we thus need to remove them before mapping inits to
+  // fields to avoid mapping inits to the wrongs fields.
+  llvm::copy_if(
+      RD->fields(), std::back_inserter(Fields),
+      [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
+  return Fields;
+}
+
+RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) {
+  auto *RD = InitList->getType()->getAsCXXRecordDecl();
+  assert(RD != nullptr);
+
+  std::vector<const FieldDecl *> Fields = getFieldsForInitListExpr(InitList);
+  ArrayRef<Expr *> Inits = InitList->inits();
+
+  // Unions initialized with an empty initializer list need special treatment.
+  // For structs/classes initialized with an empty initializer list, Clang
+  // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions,
+  // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves.
+  SmallVector<Expr *> InitsForUnion;
+  if (InitList->getType()->isUnionType() && Inits.empty()) {
+    assert(Fields.size() == 1);
+    ImplicitValueInitForUnion.emplace(Fields.front()->getType());
+    InitsForUnion.push_back(&*ImplicitValueInitForUnion);
+    Inits = InitsForUnion;
+  }
+
+  size_t InitIdx = 0;
+
+  assert(Fields.size() + RD->getNumBases() == Inits.size());
+  for (const CXXBaseSpecifier &Base : RD->bases()) {
+    assert(InitIdx < Inits.size());
+    Expr *Init = Inits[InitIdx++];
+    BaseInits.emplace_back(&Base, Init);
+  }
+
+  assert(Fields.size() == Inits.size() - InitIdx);
+  for (const FieldDecl *Field : Fields) {
+    assert(InitIdx < Inits.size());
+    Expr *Init = Inits[InitIdx++];
+    FieldInits.emplace_back(Field, Init);
+  }
+}
+
+static void insertIfGlobal(const Decl &D,
+                           llvm::DenseSet<const VarDecl *> &Globals) {
+  if (auto *V = dyn_cast<VarDecl>(&D))
+    if (V->hasGlobalStorage())
+      Globals.insert(V);
+}
+
+static void insertIfFunction(const Decl &D,
+                             llvm::DenseSet<const FunctionDecl *> &Funcs) {
+  if (auto *FD = dyn_cast<FunctionDecl>(&D))
+    Funcs.insert(FD);
+}
+
+static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
+  // Use getCalleeDecl instead of getMethodDecl in order to handle
+  // pointer-to-member calls.
+  const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl());
+  if (!MethodDecl)
+    return nullptr;
+  auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody());
+  if (!Body || Body->size() != 1)
+    return nullptr;
+  if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin()))
+    if (auto *Return = RS->getRetValue())
+      return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts());
+  return nullptr;
+}
+
+static void getReferencedDecls(const Decl &D, ReferencedDecls &Referenced) {
+  insertIfGlobal(D, Referenced.Globals);
+  insertIfFunction(D, Referenced.Functions);
+  if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
+    for (const auto *B : Decomp->bindings())
+      if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
+        // FIXME: should we be using `E->getFoundDecl()`?
+        if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
+          Referenced.Fields.insert(FD);
+}
+
+/// Traverses `S` and inserts into `Referenced` any declarations that are
+/// declared in or referenced from sub-statements.
+static void getReferencedDecls(const Stmt &S, ReferencedDecls &Referenced) {
+  for (auto *Child : S.children())
+    if (Child != nullptr)
+      getReferencedDecls(*Child, Referenced);
+  if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S))
+    getReferencedDecls(*DefaultArg->getExpr(), Referenced);
+  if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
+    getReferencedDecls(*DefaultInit->getExpr(), Referenced);
+
+  if (auto *DS = dyn_cast<DeclStmt>(&S)) {
+    if (DS->isSingleDecl())
+      getReferencedDecls(*DS->getSingleDecl(), Referenced);
+    else
+      for (auto *D : DS->getDeclGroup())
+        getReferencedDecls(*D, Referenced);
+  } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
+    insertIfGlobal(*E->getDecl(), Referenced.Globals);
+    insertIfFunction(*E->getDecl(), Referenced.Functions);
+  } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
+    // If this is a method that returns a member variable but does nothing else,
+    // model the field of the return value.
+    if (MemberExpr *E = getMemberForAccessor(*C))
+      if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
+        Referenced.Fields.insert(FD);
+  } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
+    // FIXME: should we be using `E->getFoundDecl()`?
+    const ValueDecl *VD = E->getMemberDecl();
+    insertIfGlobal(*VD, Referenced.Globals);
+    insertIfFunction(*VD, Referenced.Functions);
+    if (const auto *FD = dyn_cast<FieldDecl>(VD))
+      Referenced.Fields.insert(FD);
+  } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
+    if (InitList->getType()->isRecordType())
+      for (const auto *FD : getFieldsForInitListExpr(InitList))
+        Referenced.Fields.insert(FD);
+  }
+}
+
+ReferencedDecls getReferencedDecls(const FunctionDecl &FD) {
+  ReferencedDecls Result;
+  // Look for global variable and field references in the
+  // constructor-initializers.
+  if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&FD)) {
+    for (const auto *Init : CtorDecl->inits()) {
+      if (Init->isMemberInitializer()) {
+        Result.Fields.insert(Init->getMember());
+      } else if (Init->isIndirectMemberInitializer()) {
+        for (const auto *I : Init->getIndirectMember()->chain())
+          Result.Fields.insert(cast<FieldDecl>(I));
+      }
+      const Expr *E = Init->getInit();
+      assert(E != nullptr);
+      getReferencedDecls(*E, Result);
+    }
+    // Add all fields mentioned in default member initializers.
+    for (const FieldDecl *F : CtorDecl->getParent()->fields())
+      if (const auto *I = F->getInClassInitializer())
+        getReferencedDecls(*I, Result);
+  }
+  getReferencedDecls(*FD.getBody(), Result);
+
+  return Result;
+}
+
+} // namespace clang::dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
index a3b5d9adc24bd..6631fe27f3d90 100644
--- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_clang_library(clangAnalysisFlowSensitive
   AdornedCFG.cpp
   Arena.cpp
+  ASTOps.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
   Formula.cpp
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index d520539dd2535..e94fd39c45dc1 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -14,6 +14,7 @@
 
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Logger.h"
@@ -359,55 +360,3 @@ DataflowAnalysisContext::~DataflowAnalysisContext() = default;
 
 } // namespace dataflow
 } // namespace clang
-
-using namespace clang;
-
-const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
-  const Expr *Current = &E;
-  if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
-    Current = EWC->getSubExpr();
-    assert(Current != nullptr);
-  }
-  Current = Current->IgnoreParens();
-  assert(Current != nullptr);
-  return *Current;
-}
-
-const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
-  if (auto *E = dyn_cast<Expr>(&S))
-    return ignoreCFGOmittedNodes(*E);
-  return S;
-}
-
-// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
-// field decl will be modeled for all instances of the inherited field.
-static void getFieldsFromClassHierarchy(QualType Type,
-                                        clang::dataflow::FieldSet &Fields) {
-  if (Type->isIncompleteType() || Type->isDependentType() ||
-      !Type->isRecordType())
-    return;
-
-  for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
-    Fields.insert(Field);
-  if (auto *CXXRecord = Type->getAsCXXRecordDecl())
-    for (const CXXBaseSpecifier &Base : CXXRecord->bases())
-      getFieldsFromClassHierarchy(Base.getType(), Fields);
-}
-
-/// Gets the set of all fields in the type.
-clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) {
-  FieldSet Fields;
-  getFieldsFromClassHierarchy(Type, Fields);
-  return Fields;
-}
-
-bool clang::dataflow::containsSameFields(
-    const clang::dataflow::FieldSet &Fields,
-    const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) {
-  if (Fields.size() != FieldLocs.size())
-    return false;
-  for ([[maybe_unused]] auto [Field, Loc] : FieldLocs)
-    if (!Fields.contains(cast_or_null<FieldDecl>(Field)))
-      return false;
-  return true;
-}
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index ee2581143e114..3bf3807268bee 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -17,6 +17,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
@@ -304,93 +305,6 @@ widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
   return WidenedMap;
 }
 
-/// Initializes a global storage value.
-static void insertIfGlobal(const Decl &D,
-                           llvm::DenseSet<const VarDecl *> &Vars) {
-  if (auto *V = dyn_cast<VarDecl>(&D))
-    if (V->hasGlobalStorage())
-      Vars.insert(V);
-}
-
-static void insertIfFunction(const Decl &D,
-                             llvm::DenseSet<const FunctionDecl *> &Funcs) {
-  if (auto *FD = dyn_cast<FunctionDecl>(&D))
-    Funcs.insert(FD);
-}
-
-static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
-  // Use getCalleeDecl instead of getMethodDecl in order to handle
-  // pointer-to-member calls.
-  const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl());
-  if (!MethodDecl)
-    return nullptr;
-  auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody());
-  if (!Body || Body->size() != 1)
-    return nullptr;
-  if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin()))
-    if (auto *Return = RS->getRetValue())
-      return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts());
-  return nullptr;
-}
-
-static void
-getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
-                         llvm::DenseSet<const VarDecl *> &Vars,
-                         llvm::DenseSet<const FunctionDecl *> &Funcs) {
-  insertIfGlobal(D, Vars);
-  insertIfFunction(D, Funcs);
-  if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
-    for (const auto *B : Decomp->bindings())
-      if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
-        // FIXME: should we be using `E->getFoundDecl()`?
-        if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
-          Fields.insert(FD);
-}
-
-/// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
-/// global variables and functions that are declared in or referenced from
-/// sub-statements.
-static void
-getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
-                         llvm::DenseSet<const VarDecl *> &Vars,
-                         llvm::DenseSet<const FunctionDecl *> &Funcs) {
-  for (auto *Child : S.children())
-    if (Child != nullptr)
-      getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
-  if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S))
-    getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs);
-  if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
-    getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
-
-  if (auto *DS = dyn_cast<DeclStmt>(&S)) {
-    if (DS->isSingleDecl())
-      getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
-    else
-      for (auto *D : DS->getDeclGroup())
-        getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
-  } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
-    insertIfGlobal(*E->getDecl(), Vars);
-    insertIfFunction(*E->getDecl(), Funcs);
-  } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
-    // If this is a method that returns a member variable but does nothing else,
-    // model the field of the return value.
-    if (MemberExpr *E = getMemberForAccessor(*C))
-      if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
-        Fields.insert(FD);
-  } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
-    // FIXME: should we be using `E->getFoundDecl()`?
-    const ValueDecl *VD = E->getMemberDecl();
-    insertIfGlobal(*VD, Vars);
-    insertIfFunction(*VD, Funcs);
-    if (const auto *FD = dyn_cast<FieldDecl>(VD))
-      Fields.insert(FD);
-  } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
-    if (InitList->getType()->isRecordType())
-      for (const auto *FD : getFieldsForInitListExpr(InitList))
-        Fields.insert(FD);
-  }
-}
-
 namespace {
 
 // Visitor that builds a map from record prvalues to result objects.
@@ -653,36 +567,13 @@ void Environment::initialize() {
 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
   assert(FuncDecl->doesThisDeclarationHaveABody());
 
-  FieldSet Fields;
-  llvm::DenseSet<const VarDecl *> Vars;
-  llvm::DenseSet<const FunctionDecl *> Funcs;
-
-  // Look for global variable and field references in the
-  // constructor-initializers.
-  if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
-    for (const auto *Init : CtorDecl->inits()) {
-      if (Init->isMemberInitializer()) {
-        Fields.insert(Init->getMember());
-      } else if (Init->isIndirectMemberInitializer()) {
-        for (const auto *I : Init->getIndirectMember()->chain())
-          Fields.insert(cast<FieldDecl>(I));
-      }
-      const Expr *E = Init->getInit();
-      assert(E != nullptr);
-      getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
-    }
-    // Add all fields mentioned in default member initializers.
-    for (const FieldDecl *F : CtorDecl->getParent()->fields())
-      if (const auto *I = F->getInClassInitializer())
-          getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
-  }
-  getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
+  ReferencedDecls Referenced = getReferencedDecls(*FuncDecl);
 
   // These have to be added before the lines that follow to ensure that
   // `create*` work correctly for structs.
-  DACtx->addModeledFields(Fields);
+  DACtx->addModeledFields(Referenced.Fields);
 
-  for (const VarDecl *D : Vars) {
+  for (const VarDecl *D : Referenced.Globals) {
     if (getStorageLocation(*D) != nullptr)
       continue;
 
@@ -694,7 +585,7 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
     setStorageLocation(*D, createObject(*D, nullptr));
   }
 
-  for (const FunctionDecl *FD : Funcs) {
+  for (const FunctionDecl *FD : Referenced.Functions) {
     if (getStorageLocation(*FD) != nullptr)
       continue;
     auto &Loc = createStorageLocation(*FD);
@@ -1354,64 +1245,6 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
   return Env.get<RecordStorageLocation>(*Base);
 }
 
-std::vector<const FieldDecl *>
-getFieldsForInitListExpr(const InitListExpr *InitList) {
-  const RecordDecl *RD = InitList->getType()->getAsRecordDecl();
-  assert(RD != nullptr);
-
-  std::vector<const FieldDecl *> Fields;
-
-  if (InitList->getType()->isUnionType()) {
-    Fields.push_back(InitList->getInitializedFieldInUnion());
-    return Fields;
-  }
-
-  // Unnamed bitfields are only used for padding and do not appear in
-  // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
-  // field list, and we thus need to remove them before mapping inits to
-  // fields to avoid mapping inits to the wrongs fields.
-  llvm::copy_if(
-      RD->fields(), std::back_inserter(Fields),
-      [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
-  return Fields;
-}
-
-RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) {
-  auto *RD = InitList->getType()->getAsCXXRecordDecl();
-  assert(RD != nullptr);
-
-  std::vector<const FieldDecl *> Fields = getFieldsForInitListExpr(InitList);
-  ArrayRef<Expr *> Inits = InitList->inits();
-
-  // Unions initialized with an empty initializer list need special treatment.
-  // For structs/classes initialized with an empty initializer list, Clang
-  // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions,
-  // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves.
-  SmallVector<Expr *> InitsForUnion;
-  if (InitList->getType()->isUnionType() && Inits.empty()) {
-    assert(Fields.size() == 1);
-    ImplicitValueInitForUnion.emplace(Fields.front()->getType());
-    InitsForUnion.push_back(&*ImplicitValueInitForUnion);
-    Inits = InitsForUnion;
-  }
-
-  size_t InitIdx = 0;
-
-  assert(Fields.size() + RD->getNumBases() == Inits.size());
-  for (const CXXBaseSpecifier &Base : RD->bases()) {
-    assert(InitIdx < Inits.size());
-    Expr *Init = Inits[InitIdx++];
-    BaseInits.emplace_back(&Base, Init);
-  }
-
-  assert(Fields.size() == Inits.size() - InitIdx);
-  for (const FieldDecl *Field : Fields) {
-    assert(InitIdx < Inits.size());
-    Expr *Init = Inits[InitIdx++];
-    FieldInits.emplace_back(Field, Init);
-  }
-}
-
 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
   auto &NewVal = Env.create<RecordValue>(Loc);
   Env.setValue(Loc, NewVal);
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 88a9c0eccbebc..1e034771014ea 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -20,7 +20,9 @@
 #include "clang/AST/OperationKinds.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
 #include "clang/Analysis/FlowSensitive/RecordOps.h"

From bbd64c4ddf08be468ab4eb4c161e28bdab6808bb Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Tue, 16 Apr 2024 11:40:23 -0700
Subject: [PATCH 45/58] [RISCV] Add coverage for strength reduction of mul as
 2^N - 2^M

---
 llvm/test/CodeGen/RISCV/mul.ll | 196 ++++++++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index af341dbaadeab..364e8c7b38dac 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -465,6 +465,192 @@ define i32 @mulhu_constant(i32 %a) nounwind {
   ret i32 %4
 }
 
+define i32 @muli32_p14(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p14:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 14
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p14:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 14
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p14:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 14
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p14:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 14
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 14
+  ret i32 %1
+}
+
+define i32 @muli32_p28(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p28:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 28
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p28:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 28
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p28:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 28
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p28:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 28
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 28
+  ret i32 %1
+}
+
+define i32 @muli32_p30(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p30:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 30
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p30:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 30
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p30:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 30
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p30:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 30
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 30
+  ret i32 %1
+}
+
+define i32 @muli32_p56(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p56:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 56
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p56:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 56
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p56:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 56
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p56:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 56
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 56
+  ret i32 %1
+}
+
+define i32 @muli32_p60(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p60:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 60
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p60:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 60
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p60:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 60
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p60:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 60
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 60
+  ret i32 %1
+}
+
+define i32 @muli32_p62(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p62:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 62
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p62:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 62
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p62:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 62
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p62:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 62
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 62
+  ret i32 %1
+}
+
 define i32 @muli32_p65(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p65:
 ; RV32I:       # %bb.0:
@@ -600,6 +786,8 @@ define i64 @muli64_p63(i64 %a) nounwind {
   ret i64 %1
 }
 
+
+
 define i32 @muli32_m63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_m63:
 ; RV32I:       # %bb.0:
@@ -1145,10 +1333,10 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a6, a4
 ; RV32I-NEXT:    sub t0, t1, t0
 ; RV32I-NEXT:    mv t1, a7
-; RV32I-NEXT:    beq a5, a3, .LBB30_2
+; RV32I-NEXT:    beq a5, a3, .LBB36_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu t1, a5, a3
-; RV32I-NEXT:  .LBB30_2:
+; RV32I-NEXT:  .LBB36_2:
 ; RV32I-NEXT:    sub a2, a2, a1
 ; RV32I-NEXT:    sltu a1, a2, t1
 ; RV32I-NEXT:    sub a1, t0, a1
@@ -1261,10 +1449,10 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32I-NEXT:    slli t0, a1, 6
 ; RV32I-NEXT:    or a7, t0, a7
 ; RV32I-NEXT:    mv t0, a5
-; RV32I-NEXT:    beq a1, a7, .LBB31_2
+; RV32I-NEXT:    beq a1, a7, .LBB37_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu t0, a1, a7
-; RV32I-NEXT:  .LBB31_2:
+; RV32I-NEXT:  .LBB37_2:
 ; RV32I-NEXT:    srli t1, a1, 26
 ; RV32I-NEXT:    slli t2, a6, 6
 ; RV32I-NEXT:    or t1, t2, t1

From 8885813ebb0a61014d99ac776b8118d935848cc9 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 16 Apr 2024 20:45:12 +0200
Subject: [PATCH 46/58] [libc++][chrono] Disables a test.

This tests seems problematic on different platforms. There is still a
test that ensures coverage, but in an automatic fashion. This test needs
to be investigated.
---
 .../time.zone.members/get_info.sys_time.pass.cpp             | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp
index a751a2fb6347b..d27cf0bd89062 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp
@@ -6,7 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03, c++11, c++14, c++17
+// TODO TZDB review the test based on review comments in
+// https://github.com/llvm/llvm-project/pull/85619
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23, c++26
 // UNSUPPORTED: no-filesystem, no-localization, no-tzdb
 
 // XFAIL: libcpp-has-no-incomplete-tzdb

From 0665669876cd7f51f7572cff3bb97485d78f5de5 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 16 Apr 2024 11:49:25 -0700
Subject: [PATCH 47/58] [Sema] Mark alias/ifunc targets used and consider
 mangled names

https://reviews.llvm.org/D54188 marked "alias" targets as used in C to
fix -Wunused false positives. This patch extends the approach to handle
mangled names to support global scope names in C++ and the
`overloadable` attribute in C.

(Note: we should skip `UsingShadowDecl`, which would trigger an
assertion failure in `ItaniumMangleContextImpl::mangleCXXName`.
See regression test added by commit 1c2afbae9af22b58190c10e3517242d01d89d612.)

In addition, we mark ifunc targets as used to fix #63957 (temporarily
used by xz; ifunc was removed by
https://github.com/tukaani-project/xz/commit/689ae2427342a2ea1206eb5ca08301baf410e7e0)

While our approach has false negatives for namespace scope names, the
majority of alias/ifunc C++ uses (global scope with no overloads) are
handled.

Note: The following function with internal linkage but C language
linkage type is mangled in Clang but not in GCC. This inconsistency
makes alias/ifunc difficult to use in C++ with portability (#88593).
```
extern "C" {
static void f0() {}
// GCC: void g0() __attribute__((alias("_ZL2f0v")));
// Clang: void g0() __attribute__((alias("f0")));
}
```

Pull Request: https://github.com/llvm/llvm-project/pull/87130
---
 clang/lib/Sema/CMakeLists.txt                 |  1 +
 clang/lib/Sema/SemaDeclAttr.cpp               | 46 ++++++++++++++-----
 clang/test/AST/ast-dump-attr-json.cpp         |  1 +
 clang/test/Sema/alias-unused-win.cpp          |  2 +-
 clang/test/Sema/alias-unused.cpp              | 16 ++++---
 .../llvm-project-overlay/clang/BUILD.bazel    |  1 +
 6 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index ab3b813a9ccd9..a96439df66422 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   Core
+  Demangle
   FrontendHLSL
   FrontendOpenMP
   MC
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index b7b1fbc625a15..c3bf18a3f79e2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -45,6 +45,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/Assumptions.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/Error.h"
@@ -1983,6 +1984,38 @@ static void handleWeakRefAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) WeakRefAttr(S.Context, AL));
 }
 
+// Mark alias/ifunc target as used. Due to name mangling, we look up the
+// demangled name ignoring parameters (not supported by microsoftDemangle
+// https://github.com/llvm/llvm-project/issues/88825). This should handle the
+// majority of use cases while leaving namespace scope names unmarked.
+static void markUsedForAliasOrIfunc(Sema &S, Decl *D, const ParsedAttr &AL,
+                                    StringRef Str) {
+  std::unique_ptr<char, llvm::FreeDeleter> Demangled;
+  if (S.getASTContext().getCXXABIKind() != TargetCXXABI::Microsoft)
+    Demangled.reset(llvm::itaniumDemangle(Str, /*ParseParams=*/false));
+  std::unique_ptr<MangleContext> MC(S.Context.createMangleContext());
+  SmallString<256> Name;
+
+  const DeclarationNameInfo Target(
+      &S.Context.Idents.get(Demangled ? Demangled.get() : Str), AL.getLoc());
+  LookupResult LR(S, Target, Sema::LookupOrdinaryName);
+  if (S.LookupName(LR, S.TUScope)) {
+    for (NamedDecl *ND : LR) {
+      if (!isa<FunctionDecl>(ND) && !isa<VarDecl>(ND))
+        continue;
+      if (MC->shouldMangleDeclName(ND)) {
+        llvm::raw_svector_ostream Out(Name);
+        Name.clear();
+        MC->mangleName(GlobalDecl(ND), Out);
+      } else {
+        Name = ND->getIdentifier()->getName();
+      }
+      if (Name == Str)
+        ND->markUsed(S.Context);
+    }
+  }
+}
+
 static void handleIFuncAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   StringRef Str;
   if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
@@ -1995,6 +2028,7 @@ static void handleIFuncAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
+  markUsedForAliasOrIfunc(S, D, AL, Str);
   D->addAttr(::new (S.Context) IFuncAttr(S.Context, AL, Str));
 }
 
@@ -2029,17 +2063,7 @@ static void handleAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     }
   }
 
-  // Mark target used to prevent unneeded-internal-declaration warnings.
-  if (!S.LangOpts.CPlusPlus) {
-    // FIXME: demangle Str for C++, as the attribute refers to the mangled
-    // linkage name, not the pre-mangled identifier.
-    const DeclarationNameInfo target(&S.Context.Idents.get(Str), AL.getLoc());
-    LookupResult LR(S, target, Sema::LookupOrdinaryName);
-    if (S.LookupQualifiedName(LR, S.getCurLexicalContext()))
-      for (NamedDecl *ND : LR)
-        ND->markUsed(S.Context);
-  }
-
+  markUsedForAliasOrIfunc(S, D, AL, Str);
   D->addAttr(::new (S.Context) AliasAttr(S.Context, AL, Str));
 }
 
diff --git a/clang/test/AST/ast-dump-attr-json.cpp b/clang/test/AST/ast-dump-attr-json.cpp
index 051c2956abfdf..883e584bfedf0 100644
--- a/clang/test/AST/ast-dump-attr-json.cpp
+++ b/clang/test/AST/ast-dump-attr-json.cpp
@@ -46,6 +46,7 @@ __thread __attribute__ ((tls_model ("local-exec"))) int tls_model_var;
 // CHECK-NEXT:    "tokLen": 11
 // CHECK-NEXT:   }
 // CHECK-NEXT:  },
+// CHECK-NEXT:  "isUsed": true,
 // CHECK-NEXT:  "name": "global_decl",
 // CHECK-NEXT:  "mangledName": "global_decl",
 // CHECK-NEXT:  "type": {
diff --git a/clang/test/Sema/alias-unused-win.cpp b/clang/test/Sema/alias-unused-win.cpp
index 47c96d4117517..97d57a3bbd1e3 100644
--- a/clang/test/Sema/alias-unused-win.cpp
+++ b/clang/test/Sema/alias-unused-win.cpp
@@ -7,7 +7,7 @@ extern "C" {
 static int f(void) { return 42; } // cxx-warning{{unused function 'f'}}
 int g(void) __attribute__((alias("f")));
 
-static int foo [] = { 42, 0xDEAD }; // cxx-warning{{variable 'foo' is not needed and will not be emitted}}
+static int foo [] = { 42, 0xDEAD };
 extern typeof(foo) bar __attribute__((unused, alias("foo")));
 
 static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
diff --git a/clang/test/Sema/alias-unused.cpp b/clang/test/Sema/alias-unused.cpp
index dc8e46f072d74..c0b541c880e52 100644
--- a/clang/test/Sema/alias-unused.cpp
+++ b/clang/test/Sema/alias-unused.cpp
@@ -14,24 +14,26 @@ extern typeof(foo) bar __attribute__((unused, alias("foo")));
 /// We report a warning in C++ mode because the internal linkage `resolver` gets
 /// mangled as it does not have a language linkage. GCC does not mangle
 /// `resolver` or report a warning.
-static int (*resolver(void))(void) { return f; } // expected-warning{{unused function 'resolver'}}
+static int (*resolver(void))(void) { return f; } // cxx-warning{{unused function 'resolver'}}
 int ifunc(void) __attribute__((ifunc("resolver")));
 
-static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
+static int __attribute__((overloadable)) f0(int x) { return x; }
 static float __attribute__((overloadable)) f0(float x) { return x; } // expected-warning{{unused function 'f0'}}
 int g0(void) __attribute__((alias("_ZL2f0i")));
 
 #ifdef __cplusplus
-static int f1() { return 42; } // expected-warning{{unused function 'f1'}}
+static int f1() { return 42; }
 int g1(void) __attribute__((alias("_ZL2f1v")));
 }
 
-static int f2(int) { return 42; } // expected-warning{{unused function 'f2'}}
-static int f2() { return 42; } // expected-warning{{unused function 'f2'}}
+/// We demangle alias/ifunc target and mark all found functions as used.
+
+static int f2(int) { return 42; } // cxx-warning{{unused function 'f2'}}
+static int f2() { return 42; }
 int g2() __attribute__((alias("_ZL2f2v")));
 
-static int (*resolver1())() { return f; } // expected-warning{{unused function 'resolver1'}}
-static int (*resolver1(int))() { return f; } // expected-warning{{unused function 'resolver1'}}
+static int (*resolver1())() { return f; } // cxx-warning{{unused function 'resolver1'}}
+static int (*resolver1(int))() { return f; }
 int ifunc1() __attribute__((ifunc("_ZL9resolver1i")));
 
 /// TODO: We should report "unused function" for f3(int).
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index c2f77e3abca0e..725ac6bb38120 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -1136,6 +1136,7 @@ cc_library(
         "//llvm:AllTargetsAsmParsers",
         "//llvm:AllTargetsCodeGens",
         "//llvm:Core",
+        "//llvm:Demangle",
         "//llvm:FrontendHLSL",
         "//llvm:FrontendOpenMP",
         "//llvm:MC",

From 5422eb0b841521908c2fc60bd9c7fdc11ded12a1 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 16 Apr 2024 11:50:49 -0700
Subject: [PATCH 48/58] [memprof] Add another constructor to MemProfReader
 (#88952)

This patch enables users of MemProfReader to directly supply mappings
from CallStackId to actual call stacks.

Once the users of the current constructor without CSIdMap switch to
the new constructor, we'll have fewer users of:

- IndexedAllocationInfo::CallStack
- IndexedMemProfRecord::CallSites

bringing us one step closer to the removal of these fields in favor
of:

- IndexedAllocationInfo::CSId
- IndexedMemProfRecord::CallSiteIds
---
 llvm/include/llvm/ProfileData/MemProfReader.h |  9 ++++
 llvm/unittests/ProfileData/MemProfTest.cpp    | 41 +++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 1f84fefad03e3..7fa8af184dc93 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -98,6 +98,15 @@ class MemProfReader {
       llvm::DenseMap<FrameId, Frame> FrameIdMap,
       llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData);
 
+  // Initialize the MemProfReader with the frame mappings, call stack mappings,
+  // and profile contents.
+  MemProfReader(
+      llvm::DenseMap<FrameId, Frame> FrameIdMap,
+      llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap,
+      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
+      : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)),
+        FunctionProfileData(std::move(ProfData)) {}
+
 protected:
   // A helper method to extract the frame from the IdToFrame map.
   const Frame &idToFrame(const FrameId Id) const {
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index ab9227e9df881..f596919ed039a 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -436,6 +436,47 @@ TEST(MemProf, BaseMemProfReader) {
               FrameContains("bar", 10U, 2U, false));
 }
 
+TEST(MemProf, BaseMemProfReaderWithCSIdMap) {
+  llvm::DenseMap<FrameId, Frame> FrameIdMap;
+  Frame F1(/*Hash=*/IndexedMemProfRecord::getGUID("foo"), /*LineOffset=*/20,
+           /*Column=*/5, /*IsInlineFrame=*/true);
+  Frame F2(/*Hash=*/IndexedMemProfRecord::getGUID("bar"), /*LineOffset=*/10,
+           /*Column=*/2, /*IsInlineFrame=*/false);
+  FrameIdMap.insert({F1.hash(), F1});
+  FrameIdMap.insert({F2.hash(), F2});
+
+  llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap;
+  llvm::SmallVector<FrameId> CallStack = {F1.hash(), F2.hash()};
+  CallStackId CSId = llvm::memprof::hashCallStack(CallStack);
+  CSIdMap.insert({CSId, CallStack});
+
+  llvm::MapVector<llvm::GlobalValue::GUID, IndexedMemProfRecord> ProfData;
+  IndexedMemProfRecord FakeRecord;
+  MemInfoBlock Block;
+  Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
+  Block.TotalLifetime = 200001;
+  FakeRecord.AllocSites.emplace_back(
+      /*CS=*/llvm::SmallVector<FrameId>(),
+      /*CSId=*/llvm::memprof::hashCallStack(CallStack),
+      /*MB=*/Block);
+  ProfData.insert({F1.hash(), FakeRecord});
+
+  MemProfReader Reader(FrameIdMap, CSIdMap, ProfData);
+
+  llvm::SmallVector<MemProfRecord, 1> Records;
+  for (const auto &KeyRecordPair : Reader) {
+    Records.push_back(KeyRecordPair.second);
+  }
+
+  ASSERT_THAT(Records, SizeIs(1));
+  ASSERT_THAT(Records[0].AllocSites, SizeIs(1));
+  ASSERT_THAT(Records[0].AllocSites[0].CallStack, SizeIs(2));
+  EXPECT_THAT(Records[0].AllocSites[0].CallStack[0],
+              FrameContains("foo", 20U, 5U, true));
+  EXPECT_THAT(Records[0].AllocSites[0].CallStack[1],
+              FrameContains("bar", 10U, 2U, false));
+}
+
 TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
   // Verify that MemProfRecord can be constructed from IndexedMemProfRecord with
   // CallStackIds only.

From c7657cf7d1ee57f9cb9133164536591a1842b43c Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Tue, 16 Apr 2024 14:54:06 -0400
Subject: [PATCH 49/58] [SLP]Keep externally used GEPs as GEPs, if possible
 instead of extractelement.

If the vectorized GEP instruction can be still kept as a scalar GEP,
better to keep it as scalar instead of extractelement. In many cases it
is more profitable.

Metric: size..text

Program                                                                          size..text
                                                                                 results     results0    diff
                        test-suite :: SingleSource/Benchmarks/Misc/oourafft.test    18911.00    19695.00  4.1%
                   test-suite :: SingleSource/Benchmarks/Misc-C++-EH/spirit.test    59987.00    60707.00  1.2%
       test-suite :: External/SPEC/CFP2017speed/638.imagick_s/638.imagick_s.test  1392209.00  1392753.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/538.imagick_r/538.imagick_r.test  1392209.00  1392753.00  0.0%
           test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test  1087996.00  1088236.00  0.0%
                         test-suite :: MultiSource/Benchmarks/Bullet/bullet.test   309310.00   309342.00  0.0%
             test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test   664661.00   664693.00  0.0%
            test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test   664661.00   664693.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12354636.00 12354908.00  0.0%
                  test-suite :: External/SPEC/CFP2006/453.povray/453.povray.test  1152748.00  1152716.00 -0.0%
                       test-suite :: MultiSource/Applications/oggenc/oggenc.test   191787.00   191771.00 -0.0%
                     test-suite :: SingleSource/UnitTests/matrix-types-spec.test   480796.00   480476.00 -0.1%

Misc/oourafft - Extra code gets vectorized
Misc-C++-EH/spirit - same
CFP2017speed/638.imagick_s
CFP2017rate/538.imagick_r - same, extra code gets vectorized
CINT2006/400.perlbench - some extra 4 x ptr stores vectorized
Bullet/bullet - extra 4 x ptr store vectorized
CINT2017rate/525.x264_r
CINT2017speed/625.x264_s - same
CFP2017rate/526.blender_r - extra 8 x float stores (several), some extra
4 x ptr stores
CFP2006/453.povray - 2 x double loads/stores replaced by 4 x double
loads/stores
Applications/oggenc - extra code is vectorized
UnitTests/matrix-types-spec - extra code gets vectorized

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/88877
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 50 ++++++++++++++++++-
 .../SLPVectorizer/X86/extract_in_tree_user.ll |  4 +-
 .../SLPVectorizer/X86/geps-non-pow-2.ll       | 17 ++++---
 .../SLPVectorizer/X86/opaque-ptr.ll           | 19 +++----
 .../X86/reorder-reused-masked-gather2.ll      |  2 +-
 .../SLPVectorizer/X86/stacksave-dependence.ll |  4 +-
 6 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0cd3ca32933ca..7694627c3b043 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1134,6 +1134,7 @@ class BoUpSLP {
     MustGather.clear();
     EntryToLastInstruction.clear();
     ExternalUses.clear();
+    ExternalUsesAsGEPs.clear();
     for (auto &Iter : BlocksSchedules) {
       BlockScheduling *BS = Iter.second.get();
       BS->clear();
@@ -3154,6 +3155,10 @@ class BoUpSLP {
   /// after vectorization.
   UserList ExternalUses;
 
+  /// A list of GEPs which can be reaplced by scalar GEPs instead of
+  /// extractelement instructions.
+  SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;
+
   /// Values used only by @llvm.assume calls.
   SmallPtrSet<const Value *, 32> EphValues;
 
@@ -5541,6 +5546,7 @@ void BoUpSLP::buildExternalUses(
                           << FoundLane << " from " << *Scalar << ".\n");
         ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
         ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
+        continue;
       }
       for (User *U : Scalar->users()) {
         LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -9925,6 +9931,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
   SmallVector<APInt> DemandedElts;
   SmallDenseSet<Value *, 4> UsedInserts;
   DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
+  std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
   for (ExternalUser &EU : ExternalUses) {
     // We only add extract cost once for the same scalar.
     if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
@@ -10033,12 +10040,40 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
         }
       }
     }
+    // Leave the GEPs as is, they are free in most cases and better to keep them
+    // as GEPs.
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
+      if (!ValueToExtUses) {
+        ValueToExtUses.emplace();
+        for_each(enumerate(ExternalUses), [&](const auto &P) {
+          ValueToExtUses->try_emplace(P.value().Scalar, P.index());
+        });
+      }
+      // Can use original GEP, if no operands vectorized or they are marked as
+      // externally used already.
+      bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
+        if (!getTreeEntry(V))
+          return true;
+        auto It = ValueToExtUses->find(V);
+        if (It != ValueToExtUses->end()) {
+          // Replace all uses to avoid compiler crash.
+          ExternalUses[It->second].User = nullptr;
+          return true;
+        }
+        return false;
+      });
+      if (CanBeUsedAsGEP) {
+        ExtractCost += TTI->getInstructionCost(GEP, CostKind);
+        ExternalUsesAsGEPs.insert(EU.Scalar);
+        continue;
+      }
+    }
 
     // If we plan to rewrite the tree in a smaller type, we will need to sign
     // extend the extracted value back to the original type. Here, we account
     // for the extract and the added cost of the sign extend if needed.
     auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
-    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
     auto It = MinBWs.find(getTreeEntry(EU.Scalar));
     if (It != MinBWs.end()) {
       auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -13161,6 +13196,8 @@ Value *BoUpSLP::vectorizeTree(
       if (Scalar->getType() != Vec->getType()) {
         Value *Ex = nullptr;
         Value *ExV = nullptr;
+        auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
+        bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
         auto It = ScalarToEEs.find(Scalar);
         if (It != ScalarToEEs.end()) {
           // No need to emit many extracts, just move the only one in the
@@ -13186,6 +13223,15 @@ Value *BoUpSLP::vectorizeTree(
             if (const TreeEntry *ETE = getTreeEntry(V))
               V = ETE->VectorizedValue;
             Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
+          } else if (ReplaceGEP) {
+            // Leave the GEPs as is, they are free in most cases and better to
+            // keep them as GEPs.
+            auto *CloneGEP = GEP->clone();
+            CloneGEP->insertBefore(*Builder.GetInsertBlock(),
+                                   Builder.GetInsertPoint());
+            if (GEP->hasName())
+              CloneGEP->takeName(GEP);
+            Ex = CloneGEP;
           } else {
             Ex = Builder.CreateExtractElement(Vec, Lane);
           }
@@ -13224,6 +13270,8 @@ Value *BoUpSLP::vectorizeTree(
       assert((ExternallyUsedValues.count(Scalar) ||
               any_of(Scalar->users(),
                      [&](llvm::User *U) {
+                       if (ExternalUsesAsGEPs.contains(U))
+                         return true;
                        TreeEntry *UseEntry = getTreeEntry(U);
                        return UseEntry &&
                               (UseEntry->State == TreeEntry::Vectorize ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index 096f57d100a50..c600d75ed1e8c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -13,7 +13,7 @@ define i32 @fn1() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 11, i64 56>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
 ; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    ret i32 undef
@@ -92,7 +92,7 @@ define void @externally_used_ptrs() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 56, i64 11>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <2 x i64> [[TMP5]], [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index aa67974358306..e459cd8c6955b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -13,25 +13,26 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[C_022:%.*]] = phi ptr [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[C_022]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP9]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 1, i64 1>
 ; CHECK-NEXT:    switch i32 [[TMP3]], label [[WHILE_BODY_BACKEDGE]] [
-; CHECK-NEXT:    i32 2, label [[SW_BB:%.*]]
-; CHECK-NEXT:    i32 4, label [[SW_BB6:%.*]]
+; CHECK-NEXT:      i32 2, label [[SW_BB:%.*]]
+; CHECK-NEXT:      i32 4, label [[SW_BB6:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 1
-; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       sw.bb6:
 ; CHECK-NEXT:    [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
@@ -39,7 +40,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       while.body.backedge:
-; CHECK-NEXT:    [[C_022_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
+; CHECK-NEXT:    [[C_022_BE]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
 ; CHECK-NEXT:    [[TMP14]] = phi <2 x ptr> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY]]
 ; CHECK:       while.end:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
index 3801fa5c787b6..c40be9690cce1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
@@ -52,17 +52,14 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 {
 
 define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 1
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2
-; CHECK-NEXT:    [[I1:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 3
-; CHECK-NEXT:    [[I2:%.*]] = ptrtoint ptr [[B3]] to i64
-; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A1]], align 8
-; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 8
-; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[I1]], [[V1]]
-; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[I2]], [[V2]]
-; CHECK-NEXT:    store i64 [[ADD1]], ptr [[A1]], align 8
-; CHECK-NEXT:    store i64 [[ADD2]], ptr [[A2]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3>
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[A1]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a1 = getelementptr inbounds i64, ptr %a, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
index ddc2a1b819041..30f328293cdaa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll
@@ -9,7 +9,7 @@ define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 28, i64 24>
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
index 0125e5fab089b..e93c5244dfbe2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
@@ -35,7 +35,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
 ; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
@@ -127,7 +127,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
 ; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])

From 7d4e8c1f3bbfe976f4871c9cf953f76d771b0eda Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Tue, 16 Apr 2024 14:55:41 -0400
Subject: [PATCH 50/58] [SLP]Attempt to vectorize long stores, if short one
 failed.

We can try to vectorize long store sequences, if short ones were
unsuccessful because of the non-profitable vectorization. It should not
increase compile time significantly (stores are sorted already,
complexity is n x log n), but vectorize extra code.

Metric: size..text

Program                                                                         size..text
                                                                                results     results0    diff
         test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test  1088012.00  1088236.00  0.0%
                  test-suite :: SingleSource/UnitTests/matrix-types-spec.test   480396.00   480476.00  0.0%
          test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test   664613.00   664661.00  0.0%
         test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test   664613.00   664661.00  0.0%
        test-suite :: External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test  2041105.00  2040961.00 -0.0%
                 test-suite :: MultiSource/Applications/JM/lencod/lencod.test   836563.00   836387.00 -0.0%
                 test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test  1035100.00  1032140.00 -0.3%

In all benchmarks extra code gets vectorized

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/88563
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 81 ++++++++++++-------
 .../Transforms/SLPVectorizer/X86/pr46983.ll   | 46 +++--------
 2 files changed, 62 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7694627c3b043..8ae38550d3095 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15237,39 +15237,60 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
         Size *= 2;
       });
       unsigned StartIdx = 0;
-      for (unsigned Size : CandidateVFs) {
-        for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
-          ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
-          assert(
-              all_of(
-                  Slice,
-                  [&](Value *V) {
-                    return cast<StoreInst>(V)->getValueOperand()->getType() ==
-                           cast<StoreInst>(Slice.front())
-                               ->getValueOperand()
-                               ->getType();
-                  }) &&
-              "Expected all operands of same type.");
-          if (!VectorizedStores.count(Slice.front()) &&
-              !VectorizedStores.count(Slice.back()) &&
-              TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
-                  .second &&
-              vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
-            // Mark the vectorized stores so that we don't vectorize them again.
-            VectorizedStores.insert(Slice.begin(), Slice.end());
-            Changed = true;
-            // If we vectorized initial block, no need to try to vectorize it
-            // again.
-            if (Cnt == StartIdx)
-              StartIdx += Size;
-            Cnt += Size;
-            continue;
+      unsigned Repeat = 0;
+      constexpr unsigned MaxAttempts = 2;
+      while (true) {
+        ++Repeat;
+        for (unsigned Size : CandidateVFs) {
+          for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
+            ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
+            assert(
+                all_of(
+                    Slice,
+                    [&](Value *V) {
+                      return cast<StoreInst>(V)->getValueOperand()->getType() ==
+                             cast<StoreInst>(Slice.front())
+                                 ->getValueOperand()
+                                 ->getType();
+                    }) &&
+                "Expected all operands of same type.");
+            if (!VectorizedStores.count(Slice.front()) &&
+                !VectorizedStores.count(Slice.back()) &&
+                TriedSequences
+                    .insert(std::make_pair(Slice.front(), Slice.back()))
+                    .second &&
+                vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
+              // Mark the vectorized stores so that we don't vectorize them
+              // again.
+              VectorizedStores.insert(Slice.begin(), Slice.end());
+              Changed = true;
+              // If we vectorized initial block, no need to try to vectorize
+              // it again.
+              if (Cnt == StartIdx)
+                StartIdx += Size;
+              Cnt += Size;
+              continue;
+            }
+            ++Cnt;
+          }
+          // Check if the whole array was vectorized already - exit.
+          if (StartIdx >= Operands.size()) {
+            Repeat = MaxAttempts;
+            break;
           }
-          ++Cnt;
         }
-        // Check if the whole array was vectorized already - exit.
-        if (StartIdx >= Operands.size())
+        // Check if tried all attempts or no need for the last attempts at all.
+        if (Repeat >= MaxAttempts)
           break;
+        const unsigned MaxTotalNum = bit_floor(Operands.size() - StartIdx);
+        if (MaxVF >= MaxTotalNum)
+          break;
+        // Last attempt to vectorize max number of elements, if all previous
+        // attempts were unsuccessful because of the cost issues.
+        CandidateVFs.clear();
+        for (unsigned Size = MaxTotalNum; Size > MaxVF; Size /= 2) {
+          CandidateVFs.push_back(Size);
+        }
       }
     }
   };
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
index 75505f632a43f..3deab0975ce76 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
@@ -100,41 +100,17 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
 define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
 ; SSE-LABEL: @store_i64(
 ; SSE-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
-; SSE-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
-; SSE-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]]
-; SSE-NEXT:    [[TMP7:%.*]] = lshr i64 [[TMP6]], 15
-; SSE-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
-; SSE-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255
-; SSE-NEXT:    [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295
-; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255
-; SSE-NEXT:    store i64 [[TMP11]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
-; SSE-NEXT:    [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]]
-; SSE-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 15
-; SSE-NEXT:    [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
-; SSE-NEXT:    [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255
-; SSE-NEXT:    [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295
-; SSE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255
-; SSE-NEXT:    store i64 [[TMP19]], ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
-; SSE-NEXT:    [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]]
-; SSE-NEXT:    [[TMP23:%.*]] = lshr i64 [[TMP22]], 15
-; SSE-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
-; SSE-NEXT:    [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255
-; SSE-NEXT:    [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295
-; SSE-NEXT:    [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255
-; SSE-NEXT:    store i64 [[TMP27]], ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
-; SSE-NEXT:    [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]]
-; SSE-NEXT:    [[TMP31:%.*]] = lshr i64 [[TMP30]], 15
-; SSE-NEXT:    [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
-; SSE-NEXT:    [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255
-; SSE-NEXT:    [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295
-; SSE-NEXT:    [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255
-; SSE-NEXT:    store i64 [[TMP35]], ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; SSE-NEXT:    [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
+; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
+; SSE-NEXT:    [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
+; SSE-NEXT:    [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], <i64 15, i64 15, i64 15, i64 15>
+; SSE-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
+; SSE-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
+; SSE-NEXT:    [[TMP12:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
+; SSE-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
+; SSE-NEXT:    [[TMP14:%.*]] = zext <4 x i32> [[TMP13]] to <4 x i64>
+; SSE-NEXT:    store <4 x i64> [[TMP14]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @store_i64(

From 3eff86f82cb59d7dfc88e0cc3d8df8282f24f028 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Tue, 16 Apr 2024 18:59:12 +0000
Subject: [PATCH 51/58] [gn build] Port 9ec8c961664d

---
 .../utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
index 04f20211b3c71..22433459a7878 100644
--- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
@@ -23,6 +23,7 @@ static_library("FlowSensitive") {
     target_gen_dir,
   ]
   sources = [
+    "ASTOps.cpp",
     "AdornedCFG.cpp",
     "Arena.cpp",
     "DataflowAnalysisContext.cpp",

From b0ddbfb77d15e00e08fc36f6ccd8a4fecde465d1 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Tue, 16 Apr 2024 12:09:32 -0700
Subject: [PATCH 52/58] [clang][SPIR-V] Set AS for the SPIR-V logical triple
 (#88939)

This was missed in #88455, causing most of the .hlsl to SPIR-V tests to
fail (such as clang\test\Driver\hlsl-lang-targets-spirv.hlsl)
---
 clang/lib/Basic/Targets/SPIR.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 9a4a8b501460b..44265445ff004 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -315,7 +315,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
     // SPIR-V IDs are represented with a single 32-bit word.
     SizeType = TargetInfo::UnsignedInt;
     resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024");
+                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,

From c9731a3dccd381849bfede5e09290c0574efa248 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Tue, 16 Apr 2024 12:10:05 -0700
Subject: [PATCH 53/58] [mlir] Fix a warning about an extraneous semicolon

This patch fixes:

  mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp:58:2: error: extra ';'
  outside of a function is incompatible with C++98
  [-Werror,-Wc++98-compat-extra-semi]
---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 621986c54d492..530c50ef74f7a 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -55,7 +55,7 @@ static int64_t getRankOf(Value val) {
   if (auto ty = llvm::dyn_cast<ShapedType>(type))
     return ty.getRank();
   return 0;
-};
+}
 
 static bool isReadHintOrNone(const CachePolicyAttr &attr) {
   if (!attr)

From 0a789ea8a829da345e46d8224d73b2ddaba6969f Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane@nvidia.com>
Date: Tue, 16 Apr 2024 12:12:25 -0700
Subject: [PATCH 54/58] Fix test from #83124 and #88902

This just replaces an '#include<new>' with a declaration of array
placement new.
---
 clang/test/SemaCXX/PR41441.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/clang/test/SemaCXX/PR41441.cpp b/clang/test/SemaCXX/PR41441.cpp
index 0b012b33fce34..d0f2917e52f21 100644
--- a/clang/test/SemaCXX/PR41441.cpp
+++ b/clang/test/SemaCXX/PR41441.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang --target=x86_64-pc-linux -S -fno-discard-value-names -emit-llvm -o - %s | FileCheck %s
 
-#include <new>
+namespace std {
+  using size_t = decltype(sizeof(int));
+};
+void* operator new[](std::size_t, void*) noexcept;
 
 // CHECK: call void @llvm.memset.p0.i64(ptr align 1 %x, i8 0, i64 8, i1 false)
 // CHECK: call void @llvm.memset.p0.i64(ptr align 16 %x, i8 0, i64 32, i1 false)

From 9a0a28f8384b2cb534953df33bf124f01f0e0d0e Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 16 Apr 2024 14:19:12 -0500
Subject: [PATCH 55/58] [Libomptarget] Rework Record & Replay to be a plugin
 member (#88928)

Summary:
Previously, the R&R support was global state initialized by a global
constructor. This is bad because it prevents us from adequately
constraining the lifetime of the library. Additionally, we want to
minimize the amount of global state floating around.

This patch moves the R&R support into a plugin member like everything
else. This means there will be multiple copies of the R&R implementation
floating around, but this was already the case given the fact that we
currently handle everything with dynamic libraries.
---
 .../common/include/PluginInterface.h          | 11 ++++++
 .../common/src/PluginInterface.cpp            | 39 ++++++++++++-------
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 79e8464bfda5c..7f05464f36c1f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -45,6 +45,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Triple.h"
 
+struct RecordReplayTy;
+
 namespace llvm {
 namespace omp {
 namespace target {
@@ -1031,6 +1033,12 @@ struct GenericPluginTy {
     return *RPCServer;
   }
 
+  /// Get a reference to the R&R interface for this plugin.
+  RecordReplayTy &getRecordAndReplay() const {
+    assert(RecordReplay && "R&R not initialized");
+    return *RecordReplay;
+  }
+
   /// Get the OpenMP requires flags set for this plugin.
   int64_t getRequiresFlags() const { return RequiresFlags; }
 
@@ -1220,6 +1228,9 @@ struct GenericPluginTy {
 
   /// The interface between the plugin and the GPU for host services.
   RPCServerTy *RPCServer;
+
+  /// The interface into the record-and-replay functionality.
+  RecordReplayTy *RecordReplay;
 };
 
 namespace Plugin {
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index b5f3c45c835fd..6df9798f12e3d 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -362,8 +362,6 @@ struct RecordReplayTy {
   }
 };
 
-static RecordReplayTy RecordReplay;
-
 // Extract the mapping of host function pointers to device function pointers
 // from the entry table. Functions marked as 'indirect' in OpenMP will have
 // offloading entries generated for them which map the host's function pointer
@@ -473,7 +471,8 @@ GenericKernelTy::getKernelLaunchEnvironment(
   // Ctor/Dtor have no arguments, replaying uses the original kernel launch
   // environment. Older versions of the compiler do not generate a kernel
   // launch environment.
-  if (isCtorOrDtor() || RecordReplay.isReplaying() ||
+  if (isCtorOrDtor() ||
+      GenericDevice.Plugin.getRecordAndReplay().isReplaying() ||
       Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
     return nullptr;
 
@@ -562,6 +561,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
 
   // Record the kernel description after we modified the argument count and num
   // blocks/threads.
+  RecordReplayTy &RecordReplay = GenericDevice.Plugin.getRecordAndReplay();
   if (RecordReplay.isRecording()) {
     RecordReplay.saveImage(getName(), getImage());
     RecordReplay.saveKernelInput(getName(), getImage());
@@ -839,9 +839,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
     delete MemoryManager;
   MemoryManager = nullptr;
 
-  if (RecordReplay.isRecordingOrReplaying())
-    RecordReplay.deinit();
-
   if (RPCServer)
     if (auto Err = RPCServer->deinitDevice(*this))
       return Err;
@@ -858,6 +855,7 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
 
   return deinitImpl();
 }
+
 Expected<DeviceImageTy *>
 GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
                             const __tgt_device_image *InputTgtImage) {
@@ -892,7 +890,8 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
     return std::move(Err);
 
   // Setup the global device memory pool if needed.
-  if (!RecordReplay.isReplaying() && shouldSetupDeviceMemoryPool()) {
+  if (!Plugin.getRecordAndReplay().isReplaying() &&
+      shouldSetupDeviceMemoryPool()) {
     uint64_t HeapSize;
     auto SizeOrErr = getDeviceHeapSize(HeapSize);
     if (SizeOrErr) {
@@ -1307,8 +1306,8 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
                                             TargetAllocTy Kind) {
   void *Alloc = nullptr;
 
-  if (RecordReplay.isRecordingOrReplaying())
-    return RecordReplay.alloc(Size);
+  if (Plugin.getRecordAndReplay().isRecordingOrReplaying())
+    return Plugin.getRecordAndReplay().alloc(Size);
 
   switch (Kind) {
   case TARGET_ALLOC_DEFAULT:
@@ -1344,7 +1343,7 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
 
 Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
   // Free is a noop when recording or replaying.
-  if (RecordReplay.isRecordingOrReplaying())
+  if (Plugin.getRecordAndReplay().isRecordingOrReplaying())
     return Plugin::success();
 
   int Res;
@@ -1396,6 +1395,7 @@ Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
                                     ptrdiff_t *ArgOffsets,
                                     KernelArgsTy &KernelArgs,
                                     __tgt_async_info *AsyncInfo) {
+  RecordReplayTy &RecordReplay = Plugin.getRecordAndReplay();
   AsyncInfoWrapperTy AsyncInfoWrapper(
       *this, RecordReplay.isRecordingOrReplaying() ? nullptr : AsyncInfo);
 
@@ -1495,6 +1495,9 @@ Error GenericPluginTy::init() {
   RPCServer = new RPCServerTy(*this);
   assert(RPCServer && "Invalid RPC server");
 
+  RecordReplay = new RecordReplayTy();
+  assert(RecordReplay && "Invalid Record and Replay handler");
+
   return Plugin::success();
 }
 
@@ -1508,6 +1511,9 @@ Error GenericPluginTy::deinit() {
     assert(!Devices[DeviceId] && "Device was not deinitialized");
   }
 
+  if (RecordReplay && RecordReplay->isRecordingOrReplaying())
+    RecordReplay->deinit();
+
   // There is no global handler if no device is available.
   if (GlobalHandler)
     delete GlobalHandler;
@@ -1515,6 +1521,9 @@ Error GenericPluginTy::deinit() {
   if (RPCServer)
     delete RPCServer;
 
+  if (RecordReplay)
+    delete RecordReplay;
+
   // Perform last deinitializations on the plugin.
   return deinitImpl();
 }
@@ -1630,12 +1639,12 @@ int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId,
       isRecord ? RecordReplayTy::RRStatusTy::RRRecording
                : RecordReplayTy::RRStatusTy::RRReplaying;
 
-  if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status,
-                                   SaveOutput, ReqPtrArgOffset)) {
+  if (auto Err = RecordReplay->init(&Device, MemorySize, VAddr, Status,
+                                    SaveOutput, ReqPtrArgOffset)) {
     REPORT("WARNING RR did not intialize RR-properly with %lu bytes"
            "(Error: %s)\n",
            MemorySize, toString(std::move(Err)).data());
-    RecordReplay.setStatus(RecordReplayTy::RRStatusTy::RRDeactivated);
+    RecordReplay->setStatus(RecordReplayTy::RRStatusTy::RRDeactivated);
 
     if (!isRecord) {
       return OFFLOAD_FAIL;
@@ -1984,8 +1993,8 @@ int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size,
   assert(DevicePtr && "Invalid device global's address");
 
   // Save the loaded globals if we are recording.
-  if (RecordReplay.isRecording())
-    RecordReplay.addEntry(Name, Size, *DevicePtr);
+  if (getRecordAndReplay().isRecording())
+    getRecordAndReplay().addEntry(Name, Size, *DevicePtr);
 
   return OFFLOAD_SUCCESS;
 }

From ed7038ef334eaccdd4104053005cab52804fbfad Mon Sep 17 00:00:00 2001
From: Jeremy Kun <jkun@google.com>
Date: Tue, 16 Apr 2024 12:24:19 -0700
Subject: [PATCH 56/58] specify dialect in polynomial docs (#88933)

I figured out how to test this with `make mlir-doc doxygen-mlir`
---
 mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
index dd0384d8b79d6..79e739953d7cf 100644
--- a/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Polynomial/IR/CMakeLists.txt
@@ -1,5 +1,5 @@
 add_mlir_dialect(Polynomial polynomial)
-add_mlir_doc(Polynomial PolynomialDialect Polynomial/ -gen-dialect-doc)
+add_mlir_doc(Polynomial PolynomialDialect Polynomial/ -gen-dialect-doc -dialect=polynomial)
 add_mlir_doc(Polynomial PolynomialOps Polynomial/ -gen-op-doc)
 add_mlir_doc(Polynomial PolynomialAttributes Dialects/ -gen-attrdef-doc)
 add_mlir_doc(Polynomial PolynomialTypes Dialects/ -gen-typedef-doc)

From bfdeba4747b05634cb4eb4a6ce9ccbb8b0ff8e79 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 16 Apr 2024 15:26:41 -0400
Subject: [PATCH 57/58] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?=
 =?UTF-8?q?changes=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 llvm/lib/Target/Sparc/SparcInstrInfo.td  | 30 ++++++++++++------------
 llvm/lib/Target/X86/X86InstrCMovSetCC.td |  4 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e792427cca28..4d68f93efeac1 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
 }
 
 let rd = 0 in {
-  let Defs = [CPSR] in {
-    def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPSR] in {
+    def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
-    def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
   }
-  let Defs = [CPQ] in {
-    def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPQ] in {
+    def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
-    def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
   }
 }
 
 let rd = 0 in {
-  let Defs = [FSR] in {
-    def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [FSR] in {
+    def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
-    def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
   }
-  let Defs = [FQ] in {
-    def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Defs = [FQ] in {
+    def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
-    def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
   }
 }
-let rd = 1, Defs = [FSR] in {
-  def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+let rd = 1, mayStore = 1, Uses = [FSR] in {
+  def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
-  def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+  def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 27a0c889a4da3..e27aa4115990e 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
 }
 let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault],
     Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in
-  def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst),
-                (ins t.RegClass:$src1, ccode:$cond),
+  def mr : ITy<0x40, MRMDestMemCC, t, (outs),
+                (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond),
                 "cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF;
 }
 

From 8bece5c7ad3dcdf6860f513d9f79c9bf1ce48a23 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 16 Apr 2024 15:28:16 -0400
Subject: [PATCH 58/58] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?=
 =?UTF-8?q?changes=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 llvm/lib/Target/Sparc/SparcInstrInfo.td  | 30 ++++++++++++------------
 llvm/lib/Target/X86/X86InstrCMovSetCC.td |  4 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e792427cca28..4d68f93efeac1 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -693,38 +693,38 @@ let DecoderNamespace = "SparcV8", Predicates = [HasNoV9] in {
 }
 
 let rd = 0 in {
-  let Defs = [CPSR] in {
-    def STCSRrr : F3_1<3, 0b110101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPSR] in {
+    def STCSRrr : F3_1<3, 0b110101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
-    def STCSRri : F3_2<3, 0b110101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STCSRri : F3_2<3, 0b110101, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "st %csr, [$addr]", [], IIC_st>;
   }
-  let Defs = [CPQ] in {
-    def STDCQrr : F3_1<3, 0b110110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [CPQ] in {
+    def STDCQrr : F3_1<3, 0b110110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
-    def STDCQri : F3_2<3, 0b110110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDCQri : F3_2<3, 0b110110, (outs), (ins (MEMri $rs1, $simm13):$addr),
                        "std %cq, [$addr]", [], IIC_std>;
   }
 }
 
 let rd = 0 in {
-  let Defs = [FSR] in {
-    def STFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Uses = [FSR] in {
+    def STFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
-    def STFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "st %fsr, [$addr]", [], IIC_st>;
   }
-  let Defs = [FQ] in {
-    def STDFQrr : F3_1<3, 0b100110, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+  let mayStore = 1, Defs = [FQ] in {
+    def STDFQrr : F3_1<3, 0b100110, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
-    def STDFQri : F3_2<3, 0b100110, (outs (MEMri $rs1, $simm13):$addr), (ins),
+    def STDFQri : F3_2<3, 0b100110, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		   "std %fq, [$addr]", [], IIC_std>;
   }
 }
-let rd = 1, Defs = [FSR] in {
-  def STXFSRrr : F3_1<3, 0b100101, (outs (MEMrr $rs1, $rs2):$addr), (ins),
+let rd = 1, mayStore = 1, Uses = [FSR] in {
+  def STXFSRrr : F3_1<3, 0b100101, (outs), (ins (MEMrr $rs1, $rs2):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
-  def STXFSRri : F3_2<3, 0b100101, (outs (MEMri $rs1, $simm13):$addr), (ins),
+  def STXFSRri : F3_2<3, 0b100101, (outs), (ins (MEMri $rs1, $simm13):$addr),
 		 "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 27a0c889a4da3..e27aa4115990e 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -58,8 +58,8 @@ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
 }
 let SchedRW = [WriteCMOV, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault],
     Predicates = [HasCMOV, HasCF, In64BitMode], mayStore = 1 in
-  def mr : ITy<0x40, MRMDestMemCC, t, (outs t.MemOperand:$dst),
-                (ins t.RegClass:$src1, ccode:$cond),
+  def mr : ITy<0x40, MRMDestMemCC, t, (outs),
+                (ins t.MemOperand:$dst, t.RegClass:$src1, ccode:$cond),
                 "cfcmov${cond}", unaryop_ndd_args, []>, UseEFLAGS, NF;
 }