From f549176ad976caa3e19edd036df9a7e12770af7c Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Fri, 16 Apr 2021 14:03:36 -0700
Subject: [PATCH 01/52] [funcattrs] Add the maximal set of implied attributes
 to definitions

Have funcattrs expand all implied attributes into the IR. This expands the infrastructure from D100400, but for definitions not declarations this time.

Somewhat subtly, this mostly isn't semantic. Because the accessors did the inference, any client which used the accessor was already getting the stronger result. Clients that directly checked presence of attributes (there are some), will see a stronger result now.

The old behavior can end up quite confusing for two reasons:
* Without this change, we have situations where function-attrs appears to fail when inferring an attribute (as seen by a human reading IR), but that consuming code will see that it should have been implied. As a human trying to sanity check test results and study IR for optimization possibilities, this is exceeding error prone and confusing. (I'll note that I wasted several hours recently because of this.)
* We can have transforms which trigger without the IR appearing (on inspection) to meet the preconditions. This change doesn't prevent this from happening (as the accessors still involve multiple checks), but it should make it less frequent.

I'd argue in favor of deleting the extra checks out of the accessors after this lands, but I want that in it's own review as a) it's purely stylistic, and b) I already know there's some disagreement.

Once this lands, I'm also going to do a cleanup change which will delete some now redundant duplicate predicates in the inference code, but again, that deserves to be a change of it's own.

Differential Revision: https://reviews.llvm.org/D100226
---
 clang/test/CodeGenOpenCL/convergent.cl        |  2 +-
 llvm/lib/Transforms/IPO/FunctionAttrs.cpp     | 25 ++++++++-----------
 .../TypeBasedAliasAnalysis/functionattrs.ll   | 12 ++++-----
 llvm/test/CodeGen/AMDGPU/inline-attr.ll       | 12 ++++-----
 llvm/test/Other/cgscc-devirt-iteration.ll     | 10 ++++----
 .../Other/cgscc-iterate-function-mutation.ll  |  6 ++---
 llvm/test/Other/cgscc-observe-devirt.ll       |  4 +--
 .../FunctionAttrs/2008-09-03-ReadOnly.ll      |  5 ++--
 llvm/test/Transforms/FunctionAttrs/atomic.ll  |  4 +--
 .../FunctionAttrs/incompatible_fn_attrs.ll    |  2 +-
 .../FunctionAttrs/nofree-attributor.ll        | 10 ++++----
 llvm/test/Transforms/FunctionAttrs/nofree.ll  |  2 +-
 llvm/test/Transforms/FunctionAttrs/nosync.ll  | 12 ++++-----
 .../test/Transforms/FunctionAttrs/nounwind.ll |  6 ++---
 llvm/test/Transforms/FunctionAttrs/optnone.ll |  2 +-
 .../FunctionAttrs/willreturn-callsites.ll     |  2 +-
 .../Transforms/FunctionAttrs/writeonly.ll     |  2 +-
 .../InferFunctionAttrs/norecurse_debug.ll     |  2 +-
 llvm/test/Transforms/Inline/cgscc-update.ll   | 12 ++++-----
 19 files changed, 64 insertions(+), 68 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 25951a64c114..1905d7dd81aa 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -134,7 +134,7 @@ kernel void assume_convergent_asm()
   __asm__ volatile("s_barrier");
 }
 
-// CHECK: attributes #0 = { nofree noinline norecurse nounwind willreturn "
+// CHECK: attributes #0 = { nofree noinline norecurse nounwind willreturn mustprogress "
 // CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index cfd302a536c6..0202046158b1 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -57,6 +57,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <iterator>
 #include <map>
@@ -1556,21 +1557,7 @@ static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
         ++NumNoSync;
       },
       /* RequiresExactDefinition= */ true});
-  bool Changed = AI.run(SCCNodes);
-
-  // readnone + not convergent implies nosync
-  // (This is here so that we don't have to duplicate the function local
-  //  memory reasoning of the readnone analysis.)
-  for (Function *F : SCCNodes) {
-    if (!F || F->hasNoSync())
-      continue;
-    if (!F->doesNotAccessMemory() || F->isConvergent())
-      continue;
-    F->setNoSync();
-    NumNoSync++;
-    Changed = true;
-  }
-  return Changed;
+  return AI.run(SCCNodes);
 }
 
 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
@@ -1630,6 +1617,14 @@ static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
 
   Changed |= addNoSyncAttr(Nodes.SCCNodes);
 
+  // Finally, infer the maximal set of attributes from the ones we've inferred
+  // above.  This is handling the cases where one attribute on a signature
+  // implies another, but for implementation reasons the inference rule for
+  // the later is missing (or simply less sophisticated).
+  for (Function *F : Nodes.SCCNodes)
+    if (F)
+      Changed |= inferAttributesFromOthers(*F);
+
   return Changed;
 }
 
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 61a46337898f..06a3004dd8ee 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -72,13 +72,13 @@ define i32 @test3_no(i8* %p) nounwind {
 declare void @callee(i32* %p) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind
 
-; CHECK: attributes #0 = { norecurse nosync nounwind readnone willreturn }
-; CHECK: attributes #1 = { nofree norecurse nosync nounwind willreturn writeonly }
-; CHECK: attributes #2 = { nounwind readonly }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind readnone willreturn mustprogress }
+; CHECK: attributes #1 = { nofree norecurse nosync nounwind willreturn  writeonly mustprogress }
+; CHECK: attributes #2 = { nofree nounwind readonly }
 ; CHECK: attributes #3 = { nounwind }
-; CHECK: attributes #4 = { nosync nounwind readnone willreturn }
-; CHECK: attributes #5 = { nofree nosync nounwind willreturn }
-; CHECK: attributes #6 = { nofree norecurse nosync nounwind willreturn }
+; CHECK: attributes #4 = { nofree nosync nounwind readnone willreturn mustprogress }
+; CHECK: attributes #5 = { nofree nosync nounwind willreturn mustprogress }
+; CHECK: attributes #6 = { nofree norecurse nosync nounwind willreturn mustprogress }
 ; CHECK: attributes #7 = { argmemonly nofree nosync nounwind willreturn }
 
 ; Root note.
diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
index acf04a95db3c..16e3e5c578fe 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -6,14 +6,14 @@
 ; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
 ; GCN: %mul.i = fmul float %load, 1.500000e+01
 
-; UNSAFE: attributes #0 = { norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
-; UNSAFE: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #0 = { nofree norecurse nosync nounwind readnone willreturn mustprogress "unsafe-fp-math"="true" }
+; UNSAFE: attributes #1 = { nofree norecurse nosync nounwind willreturn mustprogress "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
 
-; NOINFS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
-; NOINFS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
+; NOINFS: attributes #0 = { nofree norecurse nosync nounwind readnone willreturn mustprogress "no-infs-fp-math"="true" }
+; NOINFS: attributes #1 = { nofree norecurse nosync nounwind willreturn mustprogress "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
 
-; NONANS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
-; NONANS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
+; NONANS: attributes #0 = { nofree norecurse nosync nounwind readnone willreturn mustprogress "no-nans-fp-math"="true" }
+; NONANS: attributes #1 = { nofree norecurse nosync nounwind willreturn mustprogress "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
 
 define float @foo(float %x) #0 {
 entry:
diff --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll
index 092c624442db..27892e85cec7 100644
--- a/llvm/test/Other/cgscc-devirt-iteration.ll
+++ b/llvm/test/Other/cgscc-devirt-iteration.ll
@@ -28,7 +28,7 @@ declare void @unknown()
 
 define void @test1() {
 ; BEFORE-NOT: Function Attrs
-; AFTER: Function Attrs: nosync readnone
+; AFTER: Function Attrs: nofree nosync readnone
 ; CHECK-LABEL: define void @test1()
 entry:
   %fptr = alloca void ()*
@@ -56,8 +56,8 @@ declare void @readnone_with_arg(void ()**) readnone
 
 define void @test2_a(void ()** %ignore) {
 ; BEFORE-NOT: Function Attrs
-; AFTER1: Function Attrs: readonly
-; AFTER2: Function Attrs: nosync readnone
+; AFTER1: Function Attrs: nofree readonly
+; AFTER2: Function Attrs: nofree nosync readnone
 ; BEFORE: define void @test2_a(void ()** %ignore)
 ; AFTER: define void @test2_a(void ()** readnone %ignore)
 entry:
@@ -77,8 +77,8 @@ entry:
 
 define void @test2_b() {
 ; BEFORE-NOT: Function Attrs
-; AFTER1: Function Attrs: readonly
-; AFTER2: Function Attrs: nosync readnone
+; AFTER1: Function Attrs: nofree readonly
+; AFTER2: Function Attrs: nofree nosync readnone
 ; CHECK-LABEL: define void @test2_b()
 entry:
   %f2ptr = alloca void ()*
diff --git a/llvm/test/Other/cgscc-iterate-function-mutation.ll b/llvm/test/Other/cgscc-iterate-function-mutation.ll
index 470f9055ced7..2075a06a756b 100644
--- a/llvm/test/Other/cgscc-iterate-function-mutation.ll
+++ b/llvm/test/Other/cgscc-iterate-function-mutation.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(simplify-cfg))' -S < %s | FileCheck %s
 
-declare void @readnone() nosync readnone
+declare void @readnone() nofree nosync readnone
 declare void @unknown()
-declare void @reference_function_pointer(void()*) nosync readnone
+declare void @reference_function_pointer(void()*) nofree nosync readnone
 
 ; The @test1_* set of functions checks that when we mutate functions with
 ; simplify-cfg to delete call edges and this ends up splitting both the SCCs
@@ -338,4 +338,4 @@ exit:
   ret void
 }
 
-; CHECK: attributes #0 = { nosync readnone }
+; CHECK: attributes #0 = { nofree nosync readnone }
diff --git a/llvm/test/Other/cgscc-observe-devirt.ll b/llvm/test/Other/cgscc-observe-devirt.ll
index 67d630b23a33..6a6168ca49d4 100644
--- a/llvm/test/Other/cgscc-observe-devirt.ll
+++ b/llvm/test/Other/cgscc-observe-devirt.ll
@@ -10,7 +10,7 @@
 ; without requiring the outer manager to iterate doesn't break any invariant.
 ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER
 
-declare void @readnone() nosync readnone
+declare void @readnone() nofree nosync readnone
 declare void @unknown()
 
 ; The @test1_* checks that if we refine an indirect call to a direct call and
@@ -103,4 +103,4 @@ define void @test2_b3() {
   ret void
 }
 
-; CHECK: attributes #0 = { nosync readnone }
+; CHECK: attributes #0 = { nofree nosync readnone }
diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index 4ea6fdc87dfa..cde17f48a0fe 100644
--- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -8,7 +8,8 @@ entry:
   ret i32 %tmp
 }
 
-; CHECK: declare i32 @e() #0
+; CHECK: declare i32 @e() #1
 declare i32 @e() readonly
 
-; CHECK: attributes #0 = { readonly }
+; CHECK: attributes #0 = { nofree readonly }
+; CHECK: attributes #1 = { readonly }
diff --git a/llvm/test/Transforms/FunctionAttrs/atomic.ll b/llvm/test/Transforms/FunctionAttrs/atomic.ll
index 3208595684fc..d8f801081b7b 100644
--- a/llvm/test/Transforms/FunctionAttrs/atomic.ll
+++ b/llvm/test/Transforms/FunctionAttrs/atomic.ll
@@ -20,5 +20,5 @@ entry:
   ret i32 %r
 }
 
-; CHECK: attributes #0 = { norecurse nosync nounwind readnone ssp uwtable willreturn }
-; CHECK: attributes #1 = { nofree norecurse nounwind ssp uwtable willreturn }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind readnone ssp uwtable willreturn mustprogress }
+; CHECK: attributes #1 = { nofree norecurse nounwind ssp uwtable willreturn mustprogress }
diff --git a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
index e913aca20c58..d8e2db15691a 100644
--- a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll
@@ -28,5 +28,5 @@ entry:
 attributes #0 = { argmemonly }
 attributes #1 = { inaccessiblememonly }
 attributes #2 = { inaccessiblemem_or_argmemonly }
-; CHECK: attributes #0 = { norecurse nosync nounwind readnone willreturn }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind readnone willreturn mustprogress }
 ; CHECK-NOT: attributes
diff --git a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
index 73befa3a0a99..41c19870ba77 100644
--- a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll
@@ -12,7 +12,7 @@ declare void @_ZdaPv(i8*) local_unnamed_addr #2
 
 
 ; TEST 1 (positive case)
-; FNATTR: Function Attrs: noinline norecurse nosync nounwind readnone uwtable
+; FNATTR: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable
 ; FNATTR-NEXT: define void @only_return()
 define void @only_return() #0 {
     ret void
@@ -78,14 +78,14 @@ end:
 ; }
 
 
-; FNATTR: Function Attrs: noinline nosync nounwind readnone uwtable
+; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; FNATTR-NEXT: define void @mutual_recursion1()
 define void @mutual_recursion1() #0 {
   call void @mutual_recursion2()
   ret void
 }
 
-; FNATTR: Function Attrs: noinline nosync nounwind readnone uwtable
+; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; FNATTR-NEXT: define void @mutual_recursion2()
 define void @mutual_recursion2() #0 {
   call void @mutual_recursion1()
@@ -132,7 +132,7 @@ define noalias i8* @call_realloc(i8* nocapture %0, i64 %1) local_unnamed_addr #0
 ; FNATTR-NEXT: declare void @nofree_function()
 declare void @nofree_function() nofree readnone #0
 
-; FNATTR: Function Attrs: noinline nosync nounwind readnone uwtable
+; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; FNATTR-NEXT: define void @call_nofree_function()
 define void @call_nofree_function() #0 {
     tail call void @nofree_function()
@@ -168,7 +168,7 @@ define void @call_both() #0 {
 
 ; TEST 10 (positive case)
 ; Call intrinsic function
-; FNATTRS: Function Attrs: noinline nosync readnone speculatable
+; FNATTRS: Function Attrs: nofree noinline nosync readnone speculatable
 ; FNATTRS-NEXT: declare float @llvm.floor.f32(float %0)
 declare float @llvm.floor.f32(float)
 
diff --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll
index a51f8468a56e..16e8bc25a5c3 100644
--- a/llvm/test/Transforms/FunctionAttrs/nofree.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll
@@ -36,7 +36,7 @@ entry:
 declare void @free(i8* nocapture) local_unnamed_addr #2
 
 define i32 @_Z4foo3Pi(i32* nocapture readonly %a) local_unnamed_addr #3 {
-; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @_Z4foo3Pi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll
index aed1f3669afc..5247ac5fa90d 100644
--- a/llvm/test/Transforms/FunctionAttrs/nosync.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Base case, empty function
 define void @test1() {
-; CHECK: Function Attrs: norecurse nosync nounwind readnone willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    ret void
 ;
@@ -15,7 +15,7 @@ define void @test1() {
 
 ; Show the bottom up walk
 define void @test2() {
-; CHECK: Function Attrs: norecurse nosync nounwind readnone willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    call void @test1()
 ; CHECK-NEXT:    ret void
@@ -38,7 +38,7 @@ define void @test3() convergent {
 }
 
 define i32 @test4(i32 %a, i32 %b) {
-; CHECK: Function Attrs: norecurse nosync nounwind readnone willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    ret i32 [[A]]
@@ -137,7 +137,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable
 }
 
 define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtable {
-; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @load_unordered(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
@@ -271,7 +271,7 @@ declare void @readnone_test() convergent readnone
 
 ; negative. Convergent
 define void @convergent_readnone(){
-; CHECK: Function Attrs: nosync readnone
+; CHECK: Function Attrs: nofree nosync readnone
 ; CHECK-LABEL: @convergent_readnone(
 ; CHECK-NEXT:    call void @readnone_test()
 ; CHECK-NEXT:    ret void
@@ -299,7 +299,7 @@ define void @i_totally_sync() {
 declare float @llvm.cos(float %val) readnone
 
 define float @cos_test(float %x) {
-; CHECK: Function Attrs: nosync nounwind readnone willreturn
+; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn
 ; CHECK-LABEL: @cos_test(
 ; CHECK-NEXT:    [[C:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
 ; CHECK-NEXT:    ret float [[C]]
diff --git a/llvm/test/Transforms/FunctionAttrs/nounwind.ll b/llvm/test/Transforms/FunctionAttrs/nounwind.ll
index 6a667cf73b1e..02c1bb4fd153 100644
--- a/llvm/test/Transforms/FunctionAttrs/nounwind.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nounwind.ll
@@ -1,14 +1,14 @@
 ; RUN: opt < %s -function-attrs -S | FileCheck %s
 
 ; TEST 1
-; CHECK: Function Attrs: norecurse nosync nounwind readnone
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone
 ; CHECK-NEXT: define i32 @foo1()
 define i32 @foo1() {
   ret i32 1
 }
 
 ; TEST 2
-; CHECK: Function Attrs: nosync nounwind readnone
+; CHECK: Function Attrs: nofree nosync nounwind readnone
 ; CHECK-NEXT: define i32 @scc1_foo()
 define i32 @scc1_foo() {
   %1 = call i32 @scc1_bar()
@@ -17,7 +17,7 @@ define i32 @scc1_foo() {
 
 
 ; TEST 3
-; CHECK: Function Attrs: nosync nounwind readnone
+; CHECK: Function Attrs: nofree nosync nounwind readnone
 ; CHECK-NEXT: define i32 @scc1_bar()
 define i32 @scc1_bar() {
   %1 = call i32 @scc1_foo()
diff --git a/llvm/test/Transforms/FunctionAttrs/optnone.ll b/llvm/test/Transforms/FunctionAttrs/optnone.ll
index 850142762140..57b9b82291dd 100644
--- a/llvm/test/Transforms/FunctionAttrs/optnone.ll
+++ b/llvm/test/Transforms/FunctionAttrs/optnone.ll
@@ -20,6 +20,6 @@ declare i8 @strlen(i8*) noinline optnone
 ; CHECK: (i8*) #1
 
 ; CHECK-LABEL: attributes #0
-; CHECK: = { norecurse nosync nounwind readnone willreturn }
+; CHECK: = { nofree norecurse nosync nounwind readnone willreturn mustprogress }
 ; CHECK-LABEL: attributes #1
 ; CHECK: = { noinline optnone }
diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
index 6c9cef7c9b6e..10fd6930d4e5 100644
--- a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
+++ b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll
@@ -38,7 +38,7 @@ define void @test_fn_willreturn(i32* %ptr) willreturn {
 }
 
 define void @test_fn_mustprogress_readonly_calls(i32* %ptr) mustprogress {
-; CHECK: Function Attrs: readonly willreturn mustprogress
+; CHECK: Function Attrs: nofree readonly willreturn mustprogress
 ; CHECK-LABEL: @test_fn_mustprogress_readonly_calls(
 ; CHECK-NOT:     call void @decl_readonly() #
 ; CHECK-NOT:     call void @decl_readnone() #
diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll
index 1efea78ba1e3..1aaae3a275f6 100644
--- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll
+++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll
@@ -27,4 +27,4 @@ nouses-argworn-funwo_entry:
 
 ; CHECK: attributes #0 = { {{.*}} readnone {{.*}} }
 ; CHECK: attributes #1 = { {{.*}} readonly {{.*}} }
-; CHECK: attributes #2 = { {{.*}} writeonly }
+; CHECK: attributes #2 = { {{.*}} writeonly {{.*}} }
diff --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
index 6b50b4870c5d..6b475103dade 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
@@ -52,5 +52,5 @@ attributes #1 = { nounwind readnone speculatable }
 !28 = !DILocation(line: 9, column: 18, scope: !2)
 !29 = !DILocation(line: 10, column: 1, scope: !2)
 
-; CHECK: attributes #0 = { nofree norecurse nosync nounwind willreturn }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind willreturn mustprogress }
 ; CHECK-NOT: foo.coefficient1
diff --git a/llvm/test/Transforms/Inline/cgscc-update.ll b/llvm/test/Transforms/Inline/cgscc-update.ll
index 024d57a13d8f..5558e9b535ab 100644
--- a/llvm/test/Transforms/Inline/cgscc-update.ll
+++ b/llvm/test/Transforms/Inline/cgscc-update.ll
@@ -27,7 +27,7 @@ entry:
 }
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test1_g()
 define void @test1_g() noinline {
 entry:
@@ -36,7 +36,7 @@ entry:
 }
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test1_h()
 define void @test1_h() noinline {
 entry:
@@ -59,7 +59,7 @@ entry:
 }
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test2_g()
 define void @test2_g() noinline {
 entry:
@@ -69,7 +69,7 @@ entry:
 }
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test2_h()
 define void @test2_h() noinline {
 entry:
@@ -152,7 +152,7 @@ exit:
 ; form a new SCC and should use that can deduce precise function attrs.
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test4_f1()
 define void @test4_f1() noinline {
 entry:
@@ -175,7 +175,7 @@ entry:
 }
 
 ; This function should have had 'readnone' deduced for its SCC.
-; CHECK: Function Attrs: noinline nosync nounwind readnone
+; CHECK: Function Attrs: nofree noinline nosync nounwind readnone
 ; CHECK-NEXT: define void @test4_h()
 define void @test4_h() noinline {
 entry:

From 5ebbb366c4a369740c3a3fe1f673e0e8c1902e60 Mon Sep 17 00:00:00 2001
From: "Peyton, Jonathan L" <jonathan.l.peyton@intel.com>
Date: Tue, 23 Mar 2021 14:02:55 -0500
Subject: [PATCH 02/52] [OpenMP] Allow affinity to re-detect for child
 processes

Current atfork() handler for child processes does not reset
the affinity masks array which prevents users from setting their own
affinity in child processes.

Differential Revision: https://reviews.llvm.org/D99218
---
 openmp/runtime/src/z_Linux_util.cpp           |   2 +
 .../test/affinity/libomp_test_affinity.h      | 131 ++++++++++++++++++
 openmp/runtime/test/affinity/redetect.c       | 101 ++++++++++++++
 3 files changed, 234 insertions(+)
 create mode 100644 openmp/runtime/test/affinity/libomp_test_affinity.h
 create mode 100644 openmp/runtime/test/affinity/redetect.c

diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 4efde5c5591a..caef1406eb0d 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1303,6 +1303,8 @@ static void __kmp_atfork_child(void) {
   if (__kmp_nested_proc_bind.bind_types != NULL) {
     __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
   }
+  __kmp_affinity_masks = NULL;
+  __kmp_affinity_num_masks = 0;
 #endif // KMP_AFFINITY_SUPPORTED
 
 #if KMP_USE_MONITOR
diff --git a/openmp/runtime/test/affinity/libomp_test_affinity.h b/openmp/runtime/test/affinity/libomp_test_affinity.h
new file mode 100644
index 000000000000..1464d9c41414
--- /dev/null
+++ b/openmp/runtime/test/affinity/libomp_test_affinity.h
@@ -0,0 +1,131 @@
+#ifndef LIBOMP_TEST_AFFINITY_H
+#define LIBOMP_TEST_AFFINITY_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+typedef struct affinity_mask_t {
+  size_t setsize;
+  cpu_set_t *set;
+} affinity_mask_t;
+
+#define AFFINITY_MAX_CPUS (32 * 64)
+
+// Operating system affinity mask API
+static void affinity_mask_zero(affinity_mask_t *mask) {
+  CPU_ZERO_S(mask->setsize, mask->set);
+}
+
+static affinity_mask_t *affinity_mask_alloc() {
+  size_t setsize = CPU_ALLOC_SIZE(AFFINITY_MAX_CPUS);
+  cpu_set_t *set = CPU_ALLOC(AFFINITY_MAX_CPUS);
+  affinity_mask_t *retval = (affinity_mask_t *)malloc(sizeof(affinity_mask_t));
+  retval->setsize = setsize;
+  retval->set = set;
+  affinity_mask_zero(retval);
+  return retval;
+}
+
+static void affinity_mask_free(affinity_mask_t *mask) { CPU_FREE(mask->set); }
+
+static void affinity_mask_copy(affinity_mask_t *dest,
+                               const affinity_mask_t *src) {
+  memcpy(dest->set, src->set, dest->setsize);
+}
+
+static void affinity_mask_set(affinity_mask_t *mask, int cpu) {
+  CPU_SET_S(cpu, mask->setsize, mask->set);
+}
+
+static void affinity_mask_clr(affinity_mask_t *mask, int cpu) {
+  CPU_CLR_S(cpu, mask->setsize, mask->set);
+}
+
+static int affinity_mask_isset(const affinity_mask_t *mask, int cpu) {
+  return CPU_ISSET_S(cpu, mask->setsize, mask->set);
+}
+
+static int affinity_mask_count(const affinity_mask_t *mask) {
+  return CPU_COUNT_S(mask->setsize, mask->set);
+}
+
+static int affinity_mask_equal(const affinity_mask_t *mask1,
+                               const affinity_mask_t *mask2) {
+  return CPU_EQUAL_S(mask1->setsize, mask1->set, mask2->set);
+}
+
+static void get_thread_affinity(affinity_mask_t *mask) {
+  if (sched_getaffinity(0, mask->setsize, mask->set) != 0) {
+    perror("sched_getaffinity()");
+    exit(EXIT_FAILURE);
+  }
+}
+
+static void set_thread_affinity(const affinity_mask_t *mask) {
+  if (sched_setaffinity(0, mask->setsize, mask->set) != 0) {
+    perror("sched_setaffinity()");
+    exit(EXIT_FAILURE);
+  }
+}
+
+static void affinity_update_snprintf_values(char **ptr, size_t *remaining,
+                                            size_t n, size_t *retval) {
+  if (n > *remaining && *remaining > 0) {
+    *ptr += *remaining;
+    *remaining = 0;
+  } else {
+    *ptr += n;
+    *remaining -= n;
+  }
+  *retval += n;
+}
+
+static size_t affinity_mask_snprintf(char *buf, size_t bufsize,
+                                     const affinity_mask_t *mask) {
+  int cpu, need_comma, begin, end;
+  size_t n;
+  char *ptr = buf;
+  size_t remaining = bufsize;
+  size_t retval = 0;
+
+  n = snprintf(ptr, remaining, "%c", '{');
+  affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+
+  need_comma = 0;
+  for (cpu = 0; cpu < AFFINITY_MAX_CPUS; cpu++) {
+    if (!affinity_mask_isset(mask, cpu))
+      continue;
+    if (need_comma) {
+      n = snprintf(ptr, remaining, "%c", ',');
+      affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+    }
+    begin = cpu;
+    // Find end of range (inclusive end)
+    for (end = begin + 1; end < AFFINITY_MAX_CPUS; ++end) {
+      if (!affinity_mask_isset(mask, end))
+        break;
+    }
+    end--;
+
+    if (end - begin >= 2) {
+      n = snprintf(ptr, remaining, "%d-%d", begin, end);
+      affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+    } else if (end - begin == 1) {
+      n = snprintf(ptr, remaining, "%d,%d", begin, end);
+      affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+    } else if (end - begin == 0) {
+      n = snprintf(ptr, remaining, "%d", begin);
+      affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+    }
+    need_comma = 1;
+    cpu = end;
+  }
+  n = snprintf(ptr, remaining, "%c", '}');
+  affinity_update_snprintf_values(&ptr, &remaining, n, &retval);
+  return retval;
+}
+#endif
diff --git a/openmp/runtime/test/affinity/redetect.c b/openmp/runtime/test/affinity/redetect.c
new file mode 100644
index 000000000000..dba83b72cc42
--- /dev/null
+++ b/openmp/runtime/test/affinity/redetect.c
@@ -0,0 +1,101 @@
+// RUN: %libomp-compile
+// RUN: env KMP_AFFINITY=none %libomp-run
+// REQUIRES: linux
+
+// Check if forked child process resets affinity properly by restricting
+// child's affinity to a subset of the parent and then checking it after
+// a parallel region
+
+#define _GNU_SOURCE
+#include "libomp_test_affinity.h"
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Set the affinity mask of the calling thread to a proper subset of the
+// original affinity mask, specifically, one processor less.
+void set_subset_affinity(affinity_mask_t *mask) {
+  int cpu;
+  affinity_mask_t *original_mask = affinity_mask_alloc();
+  affinity_mask_copy(original_mask, mask);
+  // Find first processor to clear for subset mask
+  for (cpu = 0; cpu <= AFFINITY_MAX_CPUS; ++cpu) {
+    if (affinity_mask_isset(original_mask, cpu)) {
+      affinity_mask_clr(mask, cpu);
+      break;
+    }
+  }
+  affinity_mask_free(original_mask);
+  set_thread_affinity(mask);
+}
+
+int main(int argc, char **argv) {
+  char buf[1024] = {0};
+  char *other_buf;
+  size_t n;
+  int child_exit_status, exit_status;
+  affinity_mask_t *mask = affinity_mask_alloc();
+  get_thread_affinity(mask);
+  n = affinity_mask_snprintf(buf, sizeof(buf), mask);
+  printf("Orignal Mask: %s\n", buf);
+
+  if (affinity_mask_count(mask) == 1) {
+    printf("Only one processor in affinity mask, skipping test.\n");
+    exit(EXIT_SUCCESS);
+  }
+
+  #pragma omp parallel
+  {
+    #pragma omp single
+    printf("Hello! Thread %d executed single region in parent process\n",
+           omp_get_thread_num());
+  }
+
+  pid_t pid = fork();
+  if (pid < 0) {
+    perror("fork()");
+    exit(EXIT_FAILURE);
+  }
+
+  if (pid == 0) {
+    // Let child set a new initial mask
+    set_subset_affinity(mask);
+    #pragma omp parallel
+    {
+      #pragma omp single
+      printf("Hello! Thread %d executed single region in child process\n",
+             omp_get_thread_num());
+    }
+    affinity_mask_t *new_mask = affinity_mask_alloc();
+    get_thread_affinity(new_mask);
+    if (!affinity_mask_equal(mask, new_mask)) {
+      affinity_mask_snprintf(buf, sizeof(buf), mask);
+      fprintf(stderr, "Original Mask = %s\n", buf);
+      affinity_mask_snprintf(buf, sizeof(buf), new_mask);
+      fprintf(stderr, "New Mask = %s\n", buf);
+      affinity_mask_free(new_mask);
+      fprintf(stderr, "Child affinity mask did not reset properly\n");
+      exit(EXIT_FAILURE);
+    }
+    affinity_mask_free(new_mask);
+    exit_status = EXIT_SUCCESS;
+  } else {
+    pid_t child_pid = pid;
+    pid = wait(&child_exit_status);
+    if (pid == -1) {
+      perror("wait()");
+      exit(EXIT_FAILURE);
+    }
+    if (WIFEXITED(child_exit_status)) {
+      exit_status = WEXITSTATUS(child_exit_status);
+    } else {
+      exit_status = EXIT_FAILURE;
+    }
+  }
+
+  affinity_mask_free(mask);
+  return exit_status;
+}

From 7e075ad0b261236dd0a01f0b5e01f3221b0700d7 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet@google.com>
Date: Fri, 16 Apr 2021 21:34:49 +0000
Subject: [PATCH 03/52] [libc] Add endianness support

Add endianness detection support. This will be useful to implement `memcmp`.

Differential Revision: https://reviews.llvm.org/D100571
---
 libc/src/__support/CMakeLists.txt       |   1 +
 libc/src/__support/endian.h             | 142 ++++++++++++++++++++++++
 libc/test/src/CMakeLists.txt            |   1 +
 libc/test/src/__support/CMakeLists.txt  |  11 ++
 libc/test/src/__support/endian_test.cpp |  55 +++++++++
 5 files changed, 210 insertions(+)
 create mode 100644 libc/src/__support/endian.h
 create mode 100644 libc/test/src/__support/CMakeLists.txt
 create mode 100644 libc/test/src/__support/endian_test.cpp

diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index b20e8bc9a811..4206d742b9bc 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -2,6 +2,7 @@ add_header_library(
   common
   HDRS
     common.h
+    endian.h
     sanitizer.h
 )
 
diff --git a/libc/src/__support/endian.h b/libc/src/__support/endian.h
new file mode 100644
index 000000000000..e1d52ca468d3
--- /dev/null
+++ b/libc/src/__support/endian.h
@@ -0,0 +1,142 @@
+//===-- Endianness support ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_ENDIAN_H
+#define LLVM_LIBC_SRC_SUPPORT_ENDIAN_H
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// We rely on compiler preprocessor defines to allow for cross compilation.
+#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) ||           \
+    !defined(__ORDER_BIG_ENDIAN__)
+#error "Missing preprocessor definitions for endianness detection."
+#endif
+
+namespace internal {
+
+// Converts uint8_t, uint16_t, uint32_t, uint64_t to its big or little endian
+// counterpart.
+// We use explicit template specialization:
+// - to prevent accidental integer promotion.
+// - to prevent fallback in (unlikely) case of middle-endianness.
+
+template <unsigned ORDER> struct Endian {
+  static constexpr const bool isLittle = ORDER == __ORDER_LITTLE_ENDIAN__;
+  static constexpr const bool isBig = ORDER == __ORDER_BIG_ENDIAN__;
+  template <typename T> static T ToBigEndian(T value);
+  template <typename T> static T ToLittleEndian(T value);
+};
+
+// Little Endian specializations
+template <>
+template <>
+inline uint8_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToBigEndian<uint8_t>(uint8_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint8_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToLittleEndian<uint8_t>(uint8_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint16_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToBigEndian<uint16_t>(uint16_t v) {
+  return __builtin_bswap16(v);
+}
+template <>
+template <>
+inline uint16_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToLittleEndian<uint16_t>(uint16_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint32_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToBigEndian<uint32_t>(uint32_t v) {
+  return __builtin_bswap32(v);
+}
+template <>
+template <>
+inline uint32_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToLittleEndian<uint32_t>(uint32_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint64_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToBigEndian<uint64_t>(uint64_t v) {
+  return __builtin_bswap64(v);
+}
+template <>
+template <>
+inline uint64_t
+Endian<__ORDER_LITTLE_ENDIAN__>::ToLittleEndian<uint64_t>(uint64_t v) {
+  return v;
+}
+
+// Big Endian specializations
+template <>
+template <>
+inline uint8_t Endian<__ORDER_BIG_ENDIAN__>::ToBigEndian<uint8_t>(uint8_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint8_t
+Endian<__ORDER_BIG_ENDIAN__>::ToLittleEndian<uint8_t>(uint8_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint16_t
+Endian<__ORDER_BIG_ENDIAN__>::ToBigEndian<uint16_t>(uint16_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint16_t
+Endian<__ORDER_BIG_ENDIAN__>::ToLittleEndian<uint16_t>(uint16_t v) {
+  return __builtin_bswap16(v);
+}
+template <>
+template <>
+inline uint32_t
+Endian<__ORDER_BIG_ENDIAN__>::ToBigEndian<uint32_t>(uint32_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint32_t
+Endian<__ORDER_BIG_ENDIAN__>::ToLittleEndian<uint32_t>(uint32_t v) {
+  return __builtin_bswap32(v);
+}
+template <>
+template <>
+inline uint64_t
+Endian<__ORDER_BIG_ENDIAN__>::ToBigEndian<uint64_t>(uint64_t v) {
+  return v;
+}
+template <>
+template <>
+inline uint64_t
+Endian<__ORDER_BIG_ENDIAN__>::ToLittleEndian<uint64_t>(uint64_t v) {
+  return __builtin_bswap64(v);
+}
+
+} // namespace internal
+
+using Endian = internal::Endian<__BYTE_ORDER__>;
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_ENDIAN_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index 1d7c47070cb9..d4689d9a24dc 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(__support)
 add_subdirectory(ctype)
 add_subdirectory(errno)
 add_subdirectory(fenv)
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
new file mode 100644
index 000000000000..813e4137fb4c
--- /dev/null
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_libc_testsuite(libc_support_unittests)
+
+add_libc_unittest(
+  endian_test
+  SUITE
+    libc_support_unittests
+  SRCS
+    endian_test.cpp
+  DEPENDS
+    libc.src.__support.common
+)
diff --git a/libc/test/src/__support/endian_test.cpp b/libc/test/src/__support/endian_test.cpp
new file mode 100644
index 000000000000..89e2be2bf970
--- /dev/null
+++ b/libc/test/src/__support/endian_test.cpp
@@ -0,0 +1,55 @@
+//===-- Unittests for endian ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/endian.h"
+#include "utils/UnitTest/Test.h"
+
+namespace __llvm_libc {
+
+struct LlvmLibcEndian : testing::Test {
+  template <typename T> void check(const T original, const T swapped) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    EXPECT_EQ(Endian::ToLittleEndian(original), original);
+    EXPECT_EQ(Endian::ToBigEndian(original), swapped);
+#endif
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    EXPECT_EQ(Endian::ToBigEndian(original), original);
+    EXPECT_EQ(Endian::ToLittleEndian(original), swapped);
+#endif
+  }
+};
+
+TEST_F(LlvmLibcEndian, Field) {
+  EXPECT_EQ(Endian::isLittle, __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__);
+  EXPECT_EQ(Endian::isBig, __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__);
+}
+
+TEST_F(LlvmLibcEndian, uint8_t) {
+  const auto original = uint8_t(0x12);
+  check(original, original);
+}
+
+TEST_F(LlvmLibcEndian, uint16_t) {
+  const auto original = uint16_t(0x1234);
+  const auto swapped = __builtin_bswap16(original);
+  check(original, swapped);
+}
+
+TEST_F(LlvmLibcEndian, uint32_t) {
+  const auto original = uint32_t(0x12345678);
+  const auto swapped = __builtin_bswap32(original);
+  check(original, swapped);
+}
+
+TEST_F(LlvmLibcEndian, uint64_t) {
+  const auto original = uint64_t(0x123456789ABCDEF0);
+  const auto swapped = __builtin_bswap64(original);
+  check(original, swapped);
+}
+
+} // namespace __llvm_libc

From 4457565757ea91207b7e5f2ce7b7bf173bfd2c0c Mon Sep 17 00:00:00 2001
From: "Peyton, Jonathan L" <jonathan.l.peyton@intel.com>
Date: Mon, 15 Mar 2021 14:02:34 -0500
Subject: [PATCH 04/52] [OpenMP] Implement GOMP task reductions

Implement the remaining GOMP_* functions to support task reductions
in taskgroup, parallel, loop, and taskloop constructs.  The unused mem
argument to many of the work-sharing constructs has to do with the
scan() directive/ inscan() modifier.  If mem is set, each function
will call KMP_FATAL() and tell the user scan/inscan is unsupported.  The
GOMP reduction implementation is kept separate from our implementation
because of how GOMP presents reduction data and computes the reductions.
GOMP expects the privatized copies to be present even after a #pragma
omp parallel reduction(task:...) region has ended so the data is stored
inside GOMP's uintptr_t* data pseudo-structure.  This style is tightly
coupled with GCC compiler codegen.  There also isn't any init(),
combiner(), fini() functions in GOMP's codegen so the two
implementations were to disparate to try to wrap GOMP's around our own.

Differential Revision: https://reviews.llvm.org/D98806
---
 openmp/runtime/src/kmp.h                      |   1 +
 openmp/runtime/src/kmp_ftn_os.h               |  15 +
 openmp/runtime/src/kmp_gsupport.cpp           | 528 +++++++++++++++++-
 openmp/runtime/src/kmp_tasking.cpp            |   4 +-
 .../test/tasking/omp_task_red_taskloop.c      |   5 +-
 openmp/runtime/test/tasking/task_reduction1.c |  42 ++
 openmp/runtime/test/tasking/task_reduction2.c |  37 ++
 openmp/runtime/test/tasking/task_reduction3.c |  71 +++
 openmp/runtime/test/tasking/task_reduction4.c |  39 ++
 9 files changed, 735 insertions(+), 7 deletions(-)
 create mode 100644 openmp/runtime/test/tasking/task_reduction1.c
 create mode 100644 openmp/runtime/test/tasking/task_reduction2.c
 create mode 100644 openmp/runtime/test/tasking/task_reduction3.c
 create mode 100644 openmp/runtime/test/tasking/task_reduction4.c

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index c37e1d9feb57..5084640cae82 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2252,6 +2252,7 @@ typedef struct kmp_taskgroup {
   // Block of data to perform task reduction
   void *reduce_data; // reduction related info
   kmp_int32 reduce_num_data; // number of data items to reduce
+  uintptr_t *gomp_data; // gomp reduction data
 } kmp_taskgroup_t;
 
 // forward declarations
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index c122dce47d5d..5b9e396e3dd9 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -697,5 +697,20 @@
   GOMP_parallel_loop_maybe_nonmonotonic_runtime
 #define KMP_API_NAME_GOMP_TEAMS_REG GOMP_teams_reg
 #define KMP_API_NAME_GOMP_TASKWAIT_DEPEND GOMP_taskwait_depend
+#define KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER                         \
+  GOMP_taskgroup_reduction_register
+#define KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER                       \
+  GOMP_taskgroup_reduction_unregister
+#define KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP GOMP_task_reduction_remap
+#define KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS GOMP_parallel_reductions
+#define KMP_API_NAME_GOMP_LOOP_START GOMP_loop_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_START GOMP_loop_ull_start
+#define KMP_API_NAME_GOMP_LOOP_DOACROSS_START GOMP_loop_doacross_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START GOMP_loop_ull_doacross_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_START GOMP_loop_ordered_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START GOMP_loop_ull_ordered_start
+#define KMP_API_NAME_GOMP_SECTIONS2_START GOMP_sections2_start
+#define KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER                  \
+  GOMP_workshare_task_reduction_unregister
 
 #endif /* KMP_FTN_OS_H */
diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index e57641351a04..d4e0c5b18b1b 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -1688,6 +1688,9 @@ static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,
   }
 }
 
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
+    uintptr_t *);
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
@@ -1707,6 +1710,7 @@ void __GOMP_taskloop(void (*func)(void *), void *data,
   int if_val = gomp_flags & (1u << 10);
   int nogroup = gomp_flags & (1u << 11);
   int up = gomp_flags & (1u << 8);
+  int reductions = gomp_flags & (1u << 12);
   p_task_dup_t task_dup = NULL;
   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
 #ifdef KMP_DEBUG
@@ -1778,9 +1782,31 @@ void __GOMP_taskloop(void (*func)(void *), void *data,
   loop_bounds = (T *)task->shareds;
   loop_bounds[0] = start;
   loop_bounds[1] = end + (up ? -1 : 1);
+
+  if (!nogroup) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+    __kmpc_taskgroup(&loc, gtid);
+    if (reductions) {
+      // The data pointer points to lb, ub, then reduction data
+      struct data_t {
+        T a, b;
+        uintptr_t *d;
+      };
+      uintptr_t *d = ((data_t *)data)->d;
+      KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d);
+    }
+  }
   __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),
-                  (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup,
-                  sched, (kmp_uint64)num_tasks, (void *)task_dup);
+                  (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched,
+                  (kmp_uint64)num_tasks, (void *)task_dup);
+  if (!nogroup) {
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+    OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+    __kmpc_end_taskgroup(&loc, gtid);
+  }
 }
 
 // 4 byte version of GOMP_doacross_post
@@ -1912,6 +1938,488 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) {
   KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid));
 }
 
+static inline void
+__kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg,
+                                        int nthreads,
+                                        uintptr_t *allocated = nullptr) {
+  KMP_ASSERT(data);
+  KMP_ASSERT(nthreads > 0);
+  // Have private copy pointers point to previously allocated
+  // reduction data or allocate new data here
+  if (allocated) {
+    data[2] = allocated[2];
+    data[6] = allocated[6];
+  } else {
+    data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]);
+    data[6] = data[2] + (nthreads * data[1]);
+  }
+  if (tg)
+    tg->gomp_data = data;
+}
+
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
+    uintptr_t *data) {
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid));
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
+  int nthreads = thread->th.th_team_nproc;
+  __kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads);
+}
+
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)(
+    uintptr_t *data) {
+  KA_TRACE(20,
+           ("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid()));
+  KMP_ASSERT(data && data[2]);
+  __kmp_free((void *)data[2]);
+}
+
+// Search through reduction data and set ptrs[] elements
+// to proper privatized copy address
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt,
+                                                             size_t cntorig,
+                                                             void **ptrs) {
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid));
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 tid = __kmp_get_tid();
+  for (size_t i = 0; i < cnt; ++i) {
+    uintptr_t address = (uintptr_t)ptrs[i];
+    void *propagated_address = NULL;
+    void *mapped_address = NULL;
+    // Check taskgroups reduce data
+    kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
+    while (tg) {
+      uintptr_t *gomp_data = tg->gomp_data;
+      if (!gomp_data) {
+        tg = tg->parent;
+        continue;
+      }
+      // Check the shared addresses list
+      size_t num_vars = (size_t)gomp_data[0];
+      uintptr_t per_thread_size = gomp_data[1];
+      uintptr_t reduce_data = gomp_data[2];
+      uintptr_t end_reduce_data = gomp_data[6];
+      for (size_t j = 0; j < num_vars; ++j) {
+        uintptr_t *entry = gomp_data + 7 + 3 * j;
+        if (entry[0] == address) {
+          uintptr_t offset = entry[1];
+          mapped_address =
+              (void *)(reduce_data + tid * per_thread_size + offset);
+          if (i < cntorig)
+            propagated_address = (void *)entry[0];
+          break;
+        }
+      }
+      if (mapped_address)
+        break;
+      // Check if address is within privatized copies range
+      if (!mapped_address && address >= reduce_data &&
+          address < end_reduce_data) {
+        uintptr_t offset = (address - reduce_data) % per_thread_size;
+        mapped_address = (void *)(reduce_data + tid * per_thread_size + offset);
+        if (i < cntorig) {
+          for (size_t j = 0; j < num_vars; ++j) {
+            uintptr_t *entry = gomp_data + 7 + 3 * j;
+            if (entry[1] == offset) {
+              propagated_address = (void *)entry[0];
+              break;
+            }
+          }
+        }
+      }
+      if (mapped_address)
+        break;
+      tg = tg->parent;
+    }
+    KMP_ASSERT(mapped_address);
+    ptrs[i] = mapped_address;
+    if (i < cntorig) {
+      KMP_ASSERT(propagated_address);
+      ptrs[cnt + i] = propagated_address;
+    }
+  }
+}
+
+static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+  kmp_team_t *team = thr->th.th_team;
+  // First start a taskgroup
+  __kmpc_taskgroup(NULL, gtid);
+  // Then setup reduction data
+  void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
+  if (reduce_data == NULL &&
+      __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
+                                 (void *)1)) {
+    // Single thread enters this block to initialize common reduction data
+    KMP_DEBUG_ASSERT(reduce_data == NULL);
+    __kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc);
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0);
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data);
+  } else {
+    // Wait for task reduction initialization
+    while ((reduce_data = KMP_ATOMIC_LD_ACQ(
+                &team->t.t_tg_reduce_data[is_ws])) == (void *)1) {
+      KMP_CPU_PAUSE();
+    }
+    KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
+  }
+  // For worksharing constructs, each thread has its own reduction structure.
+  // Have each reduction structure point to same privatized copies of vars.
+  // For parallel, each thread points to same reduction structure and privatized
+  // copies of vars
+  if (is_ws) {
+    __kmp_GOMP_taskgroup_reduction_register(
+        data, NULL, thr->th.th_team_nproc,
+        (uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws]));
+  }
+  kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup;
+  tg->gomp_data = data;
+}
+
+static unsigned
+__kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr,
+                                            void (*task)(void *), void *data) {
+  kmp_info_t *thr = __kmp_threads[*gtid];
+  kmp_team_t *team = thr->th.th_team;
+  uintptr_t *reduce_data = *(uintptr_t **)data;
+  __kmp_GOMP_init_reductions(*gtid, reduce_data, 0);
+
+#if OMPT_SUPPORT
+  ompt_frame_t *ompt_frame;
+  ompt_state_t enclosing_state;
+
+  if (ompt_enabled.enabled) {
+    // save enclosing task state; set current state for task
+    enclosing_state = thr->th.ompt_thread_info.state;
+    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+
+    // set task frame
+    __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
+    ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
+  }
+#endif
+
+  task(data);
+
+#if OMPT_SUPPORT
+  if (ompt_enabled.enabled) {
+    // clear task frame
+    ompt_frame->exit_frame = ompt_data_none;
+
+    // restore enclosing state
+    thr->th.ompt_thread_info.state = enclosing_state;
+  }
+#endif
+  __kmpc_end_taskgroup(NULL, *gtid);
+  // if last thread out, then reset the team's reduce data
+  // the GOMP_taskgroup_reduction_unregister() function will deallocate
+  // private copies after reduction calculations take place.
+  int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]);
+  if (count == thr->th.th_team_nproc - 1) {
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL);
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0);
+  }
+  return (unsigned)thr->th.th_team_nproc;
+}
+
+unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)(
+    void (*task)(void *), void *data, unsigned num_threads,
+    unsigned int flags) {
+  MKLOC(loc, "GOMP_parallel_reductions");
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid));
+  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
+                       (microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper,
+                       2, task, data);
+  unsigned retval =
+      __kmp_GOMP_par_reductions_microtask_wrapper(&gtid, NULL, task, data);
+  KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
+  KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid));
+  return retval;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)(
+    long start, long end, long incr, long sched, long chunk_size, long *istart,
+    long *iend, uintptr_t *reductions, void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  long monotonic = sched & MONOTONIC_FLAG;
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)(
+          start, end, incr, istart, iend);
+    else
+      status = KMP_EXPAND_NAME(
+          KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)(
+          start, end, incr, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)(
+        start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)(
+          start, end, incr, chunk_size, istart, iend);
+    else
+      status =
+          KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)(
+              start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)(
+          start, end, incr, chunk_size, istart, iend);
+    else
+      status =
+          KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)(
+              start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 4) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)(
+        start, end, incr, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)(
+    bool up, unsigned long long start, unsigned long long end,
+    unsigned long long incr, long sched, unsigned long long chunk_size,
+    unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
+    void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20,
+           ("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  long monotonic = sched & MONOTONIC_FLAG;
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)(
+          up, start, end, incr, istart, iend);
+    else
+      status = KMP_EXPAND_NAME(
+          KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)(
+          up, start, end, incr, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)(
+        up, start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)(
+          up, start, end, incr, chunk_size, istart, iend);
+    else
+      status = KMP_EXPAND_NAME(
+          KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)(
+          up, start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    if (monotonic)
+      status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)(
+          up, start, end, incr, chunk_size, istart, iend);
+    else
+      status =
+          KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)(
+              up, start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 4) {
+    status =
+        KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)(
+            up, start, end, incr, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)(
+    unsigned ncounts, long *counts, long sched, long chunk_size, long *istart,
+    long *iend, uintptr_t *reductions, void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid,
+                reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  // Ignore any monotonic flag
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)(
+        ncounts, counts, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)(
+    unsigned ncounts, unsigned long long *counts, long sched,
+    unsigned long long chunk_size, unsigned long long *istart,
+    unsigned long long *iend, uintptr_t *reductions, void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid,
+                reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  // Ignore any monotonic flag
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)(
+        ncounts, counts, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)(
+        ncounts, counts, chunk_size, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)(
+    long start, long end, long incr, long sched, long chunk_size, long *istart,
+    long *iend, uintptr_t *reductions, void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid,
+                reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  // Ignore any monotonic flag
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)(
+        start, end, incr, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)(
+        start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)(
+        start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)(
+        start, end, incr, chunk_size, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)(
+    bool up, unsigned long long start, unsigned long long end,
+    unsigned long long incr, long sched, unsigned long long chunk_size,
+    unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
+    void **mem) {
+  int status = 0;
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid,
+                reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  if (istart == NULL)
+    return true;
+  // Ignore any monotonic flag
+  const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
+  sched &= ~MONOTONIC_FLAG;
+  if (sched == 0) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)(
+        up, start, end, incr, istart, iend);
+  } else if (sched == 1) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)(
+        up, start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 2) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)(
+        up, start, end, incr, chunk_size, istart, iend);
+  } else if (sched == 3) {
+    status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)(
+        up, start, end, incr, chunk_size, istart, iend);
+  } else {
+    KMP_ASSERT(0);
+  }
+  return status;
+}
+
+unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)(
+    unsigned count, uintptr_t *reductions, void **mem) {
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20,
+           ("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions));
+  if (reductions)
+    __kmp_GOMP_init_reductions(gtid, reductions, 1);
+  if (mem)
+    KMP_FATAL(GompFeatureNotSupported, "scan");
+  return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count);
+}
+
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(
+    bool cancelled) {
+  int gtid = __kmp_get_gtid();
+  MKLOC(loc, "GOMP_workshare_task_reduction_unregister");
+  KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid));
+  kmp_info_t *thr = __kmp_threads[gtid];
+  kmp_team_t *team = thr->th.th_team;
+  __kmpc_end_taskgroup(NULL, gtid);
+  // If last thread out of workshare, then reset the team's reduce data
+  // the GOMP_taskgroup_reduction_unregister() function will deallocate
+  // private copies after reduction calculations take place.
+  int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]);
+  if (count == thr->th.th_team_nproc - 1) {
+    KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)
+    ((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1]));
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL);
+    KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0);
+  }
+  if (!cancelled) {
+    __kmpc_barrier(&loc, gtid);
+  }
+}
+
 /* The following sections of code create aliases for the GOMP_* functions, then
    create versioned symbols using the assembler directive .symver. This is only
    pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
@@ -2085,7 +2593,21 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
                    50, "GOMP_5.0");
 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0");
-
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50,
+                   "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50,
+                   "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,
+                   "GOMP_5.0");
 #endif // KMP_USE_VERSION_SYMBOLS
 
 #ifdef __cplusplus
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index d6409f7b7d47..8964decfb1ce 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2497,6 +2497,7 @@ void __kmpc_taskgroup(ident_t *loc, int gtid) {
   tg_new->parent = taskdata->td_taskgroup;
   tg_new->reduce_data = NULL;
   tg_new->reduce_num_data = 0;
+  tg_new->gomp_data = NULL;
   taskdata->td_taskgroup = tg_new;
 
 #if OMPT_SUPPORT && OMPT_OPTIONAL
@@ -2595,7 +2596,8 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
   }
   KMP_DEBUG_ASSERT(taskgroup->count == 0);
 
-  if (taskgroup->reduce_data != NULL) { // need to reduce?
+  if (taskgroup->reduce_data != NULL &&
+      !taskgroup->gomp_data) { // need to reduce?
     int cnt;
     void *reduce_data;
     kmp_team_t *t = thread->th.th_team;
diff --git a/openmp/runtime/test/tasking/omp_task_red_taskloop.c b/openmp/runtime/test/tasking/omp_task_red_taskloop.c
index 6683ab682591..17130f4c5480 100644
--- a/openmp/runtime/test/tasking/omp_task_red_taskloop.c
+++ b/openmp/runtime/test/tasking/omp_task_red_taskloop.c
@@ -6,9 +6,8 @@
 // Parsing error until clang11:
 // UNSUPPORTED: clang-10, clang-9, clang-8, clang-7
 
-// Missing GOMP_taskgroup_reduction_(un)register in LLVM/OpenMP
-// Should be removed once the functions are implemented
-// XFAIL: gcc-9, gcc-10
+// No icc compiler support yet
+// XFAIL: icc
 
 #include <stdio.h>
 #include <omp.h>
diff --git a/openmp/runtime/test/tasking/task_reduction1.c b/openmp/runtime/test/tasking/task_reduction1.c
new file mode 100644
index 000000000000..39712ea8f48c
--- /dev/null
+++ b/openmp/runtime/test/tasking/task_reduction1.c
@@ -0,0 +1,42 @@
+// RUN: %libomp-compile-and-run
+
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int a = 0, b = 1;
+
+int main(int argc, char **argv) {
+
+  #pragma omp parallel
+  #pragma omp single
+  {
+    #pragma omp taskgroup task_reduction(+: a) task_reduction(*: b)
+    {
+      int i;
+      for (i = 1; i <= 5; ++i) {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += i;
+          b *= i;
+          #pragma omp task in_reduction(+: a)
+          {
+            a += i;
+          }
+        }
+      }
+    }
+  }
+
+  if (a != 30) {
+    fprintf(stderr, "error: a != 30. Instead a = %d\n", a);
+    exit(EXIT_FAILURE);
+  }
+  if (b != 120) {
+    fprintf(stderr, "error: b != 120. Instead b = %d\n", b);
+    exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/openmp/runtime/test/tasking/task_reduction2.c b/openmp/runtime/test/tasking/task_reduction2.c
new file mode 100644
index 000000000000..06a93b44dff1
--- /dev/null
+++ b/openmp/runtime/test/tasking/task_reduction2.c
@@ -0,0 +1,37 @@
+// RUN: %libomp-compile-and-run
+//
+// XFAIL: icc
+// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8, clang-9, clang-10
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int a = 0, b = 1;
+
+int main(int argc, char **argv) {
+
+  #pragma omp parallel
+  {
+    int i;
+    #pragma omp for reduction(task, +: a) reduction(task, *: b)
+    for (i = 1; i <= 5; ++i) {
+      #pragma omp task in_reduction(+: a) in_reduction(*: b)
+      {
+        a += i;
+        b *= i;
+      }
+    }
+  }
+
+  if (a != 15) {
+    fprintf(stderr, "error: a != 15. Instead a = %d\n", a);
+    exit(EXIT_FAILURE);
+  }
+  if (b != 120) {
+    fprintf(stderr, "error: b != 120. Instead b = %d\n", b);
+    exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/openmp/runtime/test/tasking/task_reduction3.c b/openmp/runtime/test/tasking/task_reduction3.c
new file mode 100644
index 000000000000..b125e3f6b385
--- /dev/null
+++ b/openmp/runtime/test/tasking/task_reduction3.c
@@ -0,0 +1,71 @@
+// RUN: %libomp-compile-and-run
+
+// XFAIL: icc
+// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8, clang-9, clang-10
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int a = 0, b = 1;
+
+int main(int argc, char **argv) {
+
+  #pragma omp parallel
+  {
+    #pragma omp sections reduction(task, +: a) reduction(task, *: b)
+    {
+      #pragma omp section
+      {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += 1;
+          b *= 1;
+        }
+      }
+      #pragma omp section
+      {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += 2;
+          b *= 2;
+        }
+      }
+      #pragma omp section
+      {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += 3;
+          b *= 3;
+        }
+      }
+      #pragma omp section
+      {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += 4;
+          b *= 4;
+        }
+      }
+      #pragma omp section
+      {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += 5;
+          b *= 5;
+        }
+      }
+    }
+  }
+
+  if (a != 15) {
+    fprintf(stderr, "error: a != 15. Instead a = %d\n", a);
+    exit(EXIT_FAILURE);
+  }
+  if (b != 120) {
+    fprintf(stderr, "error: b != 120. Instead b = %d\n", b);
+    exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}
diff --git a/openmp/runtime/test/tasking/task_reduction4.c b/openmp/runtime/test/tasking/task_reduction4.c
new file mode 100644
index 000000000000..9b686ab9f4b9
--- /dev/null
+++ b/openmp/runtime/test/tasking/task_reduction4.c
@@ -0,0 +1,39 @@
+// RUN: %libomp-compile-and-run
+
+// XFAIL: icc
+// UNSUPPORTED: clang-4, clang-5, clang-6, clang-7, clang-8, clang-9, clang-10
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int a = 0, b = 1;
+
+int main(int argc, char **argv) {
+
+  #pragma omp parallel reduction(task, +:a) reduction(task, *:b)
+  {
+    #pragma omp single
+    {
+      int i;
+      for (i = 1; i <= 5; ++i) {
+        #pragma omp task in_reduction(+: a) in_reduction(*: b)
+        {
+          a += i;
+          b *= i;
+        }
+      }
+    }
+  }
+
+  if (a != 15) {
+    fprintf(stderr, "error: a != 15. Instead a = %d\n", a);
+    exit(EXIT_FAILURE);
+  }
+  if (b != 120) {
+    fprintf(stderr, "error: b != 120. Instead b = %d\n", b);
+    exit(EXIT_FAILURE);
+  }
+
+  return EXIT_SUCCESS;
+}

From 2bfe15810defad3e0615f79e0bda375fc8f02f23 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet@google.com>
Date: Fri, 16 Apr 2021 21:58:27 +0000
Subject: [PATCH 05/52] [libc] Fix wrongly deduced type

---
 libc/test/src/__support/endian_test.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libc/test/src/__support/endian_test.cpp b/libc/test/src/__support/endian_test.cpp
index 89e2be2bf970..e06355759e0f 100644
--- a/libc/test/src/__support/endian_test.cpp
+++ b/libc/test/src/__support/endian_test.cpp
@@ -30,25 +30,25 @@ TEST_F(LlvmLibcEndian, Field) {
 }
 
 TEST_F(LlvmLibcEndian, uint8_t) {
-  const auto original = uint8_t(0x12);
+  const uint8_t original = uint8_t(0x12);
   check(original, original);
 }
 
 TEST_F(LlvmLibcEndian, uint16_t) {
-  const auto original = uint16_t(0x1234);
-  const auto swapped = __builtin_bswap16(original);
+  const uint16_t original = uint16_t(0x1234);
+  const uint16_t swapped = __builtin_bswap16(original);
   check(original, swapped);
 }
 
 TEST_F(LlvmLibcEndian, uint32_t) {
-  const auto original = uint32_t(0x12345678);
-  const auto swapped = __builtin_bswap32(original);
+  const uint32_t original = uint32_t(0x12345678);
+  const uint32_t swapped = __builtin_bswap32(original);
   check(original, swapped);
 }
 
 TEST_F(LlvmLibcEndian, uint64_t) {
-  const auto original = uint64_t(0x123456789ABCDEF0);
-  const auto swapped = __builtin_bswap64(original);
+  const uint64_t original = uint64_t(0x123456789ABCDEF0);
+  const uint64_t swapped = __builtin_bswap64(original);
   check(original, swapped);
 }
 

From 48cc5b0d35fe5d5bd7a9427dd6af2c0d59bd5ce2 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 16 Apr 2021 18:03:44 -0400
Subject: [PATCH 06/52] [gn build] (manually) port ca6751043d88

---
 llvm/utils/gn/secondary/lld/test/BUILD.gn      | 7 +++++++
 llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn | 1 +
 2 files changed, 8 insertions(+)

diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn
index 9344f7e4c04f..845d425b2d5f 100644
--- a/llvm/utils/gn/secondary/lld/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn
@@ -1,5 +1,6 @@
 import("//llvm/lib/DebugInfo/PDB/enable_dia.gni")
 import("//llvm/triples.gni")
+import("//llvm/utils/gn/build/libs/xar/enable.gni")
 import("//llvm/utils/gn/build/libs/xml/enable.gni")
 import("//llvm/utils/gn/build/libs/zlib/enable.gni")
 import("//llvm/utils/gn/build/write_cmake_config.gni")
@@ -51,6 +52,12 @@ write_lit_cfg("lit_site_cfg") {
     extra_values += [ "LLVM_ENABLE_DIA_SDK=0" ]  # Must be 0.
   }
 
+  if (llvm_enable_libxar) {
+    extra_values += [ "HAVE_LIBXAR=1" ]
+  } else {
+    extra_values += [ "HAVE_LIBXAR=" ]
+  }
+
   if (llvm_enable_libxml2) {
     extra_values += [ "LLVM_ENABLE_LIBXML2=1" ]
   } else {
diff --git a/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn b/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
index ca87ca8a5c3b..f5934086fe0c 100644
--- a/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
@@ -34,6 +34,7 @@ executable("lld") {
     "//lld/lib/Driver",
     "//lld/wasm",
     "//llvm/lib/Support",
+    "//llvm/utils/gn/build/libs/xar",
   ]
   sources = [ "lld.cpp" ]
 }

From be0ffbb21f6631a73679d371da5315db3eb3537c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 16 Apr 2021 14:59:40 -0700
Subject: [PATCH 07/52] [TableGen] Run GenerateVariants before
 ExpandHwModeBasedTypes.

A large portion of the patterns are duplicated for HwMode on RISCV.
If we expand HwMode first, we need to check nearly twice as many
patterns for variants. HwModes shouldn't affect whether a variant
is valid so we should be able to expand after.

This also reduces the RISCV isel table by 539 bytes due to factoring
working better on this pattern order. Unfortunately it increases
Hexagon table size by ~50 bytes. But I think this is a reasonable
trade.
---
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index dff346477a05..43486880933c 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -3086,15 +3086,15 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R,
   ParsePatternFragments(/*OutFrags*/true);
   ParsePatterns();
 
+  // Generate variants.  For example, commutative patterns can match
+  // multiple ways.  Add them to PatternsToMatch as well.
+  GenerateVariants();
+
   // Break patterns with parameterized types into a series of patterns,
   // where each one has a fixed type and is predicated on the conditions
   // of the associated HW mode.
   ExpandHwModeBasedTypes();
 
-  // Generate variants.  For example, commutative patterns can match
-  // multiple ways.  Add them to PatternsToMatch as well.
-  GenerateVariants();
-
   // Infer instruction flags.  For example, we can detect loads,
   // stores, and side effects in many cases by examining an
   // instruction's pattern.

From bc636c1c2c8aafeac5ce3aba0b268fdcb1914864 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 16 Apr 2021 18:16:14 -0400
Subject: [PATCH 08/52] [gn build] (manually) port ca6751043d88 better

---
 llvm/utils/gn/secondary/lld/test/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn
index 845d425b2d5f..5268f2e544b6 100644
--- a/llvm/utils/gn/secondary/lld/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn
@@ -55,7 +55,7 @@ write_lit_cfg("lit_site_cfg") {
   if (llvm_enable_libxar) {
     extra_values += [ "HAVE_LIBXAR=1" ]
   } else {
-    extra_values += [ "HAVE_LIBXAR=" ]
+    extra_values += [ "HAVE_LIBXAR=0" ]  # Must be 0.
   }
 
   if (llvm_enable_libxml2) {

From 11707435ccb44a9377bfed407453e0646a159636 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Fri, 16 Apr 2021 15:28:15 -0700
Subject: [PATCH 09/52] [inferattrs] Don't infer lib func attributes for
 nobuiltin functions

If we have a nobuiltin function, we can't assume we know anything about the implementation.

I noticed this when tracing through a log from an in the wild miscompile (https://github.com/emscripten-core/emscripten/issues/9443) triggered after 8666463.  We were incorrectly assuming that a custom allocator could not free.  (It's not clear yet this is the only problem in said issue.)

I also noticed something similiar mentioned in the commit message of ab243e when scrolling back through history.  Through, from what I can tell, that commit fixed symptom not root cause.

The interface we have for library function detection is extremely error prone, but given the interaction between ``nobuiltin`` decls and ``builtin`` callsites, it's really hard to imagine something much cleaner.  I may iterate on that, but it'll be invasive enough I didn't want to hold an obvious functional fix on it.
---
 llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp       | 3 ++-
 llvm/test/Transforms/InferFunctionAttrs/nobuiltin.ll | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InferFunctionAttrs/nobuiltin.ll

diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 30402f109f30..c32e09875a12 100644
--- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -31,7 +31,8 @@ static bool inferAllPrototypeAttributes(
     // attribute logic on all calls to declarations (as declarations aren't
     // explicitly visited by CGSCC passes in the new pass manager.)
     if (F.isDeclaration() && !F.hasOptNone()) {
-      Changed |= inferLibFuncAttributes(F, GetTLI(F));
+      if (!F.hasFnAttribute(Attribute::NoBuiltin))
+        Changed |= inferLibFuncAttributes(F, GetTLI(F));
       Changed |= inferAttributesFromOthers(F);
     }
 
diff --git a/llvm/test/Transforms/InferFunctionAttrs/nobuiltin.ll b/llvm/test/Transforms/InferFunctionAttrs/nobuiltin.ll
new file mode 100644
index 000000000000..1239a22a3a05
--- /dev/null
+++ b/llvm/test/Transforms/InferFunctionAttrs/nobuiltin.ll
@@ -0,0 +1,5 @@
+; RUN: opt -S -inferattrs < %s | FileCheck %s
+
+; CHECK: Function Attrs: nobuiltin allocsize(0)
+; CHECK: declare i8* @_Znwm(i32)
+declare i8* @_Znwm(i32) nobuiltin allocsize(0)

From e9fe788d326090cb6155c0dec90b44c932273dd3 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jason@molenda.com>
Date: Fri, 16 Apr 2021 16:10:16 -0700
Subject: [PATCH 10/52] Target::ReadMemory read from read-only binary file
 Section, not memory

Commiting this patch for Augusto Noronha who is getting set
up still.

This patch changes Target::ReadMemory so the default behavior
when a read is in a Section that is read-only is to fetch the
data from the local binary image, instead of reading it from
memory.  Update all callers to use their old preferences
(the old prefer_file_cache bool) using the new API; we should
revisit these calls and see if they really intend to read
live memory, or if reading from a read-only Section would be
equivalent and important for performance-sensitive cases.

rdar://30634422

Differential revision: https://reviews.llvm.org/D100338
---
 lldb/include/lldb/Core/Disassembler.h         | 13 ++--
 lldb/include/lldb/Symbol/Function.h           |  4 +-
 lldb/include/lldb/Target/Target.h             | 26 ++++----
 lldb/source/API/SBFunction.cpp                |  4 +-
 lldb/source/API/SBSymbol.cpp                  |  4 +-
 lldb/source/API/SBTarget.cpp                  |  8 +--
 lldb/source/Commands/CommandObjectMemory.cpp  |  4 +-
 lldb/source/Core/Address.cpp                  |  6 +-
 lldb/source/Core/Disassembler.cpp             | 14 ++---
 lldb/source/Core/IOHandlerCursesGUI.cpp       |  2 +-
 lldb/source/Core/Value.cpp                    | 13 ++--
 lldb/source/Core/ValueObject.cpp              |  2 +-
 lldb/source/Expression/IRMemoryMap.cpp        |  2 +-
 .../Architecture/Mips/ArchitectureMips.cpp    |  4 +-
 .../DynamicLoaderDarwinKernel.cpp             | 13 ++--
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       |  2 +-
 .../Windows-DYLD/DynamicLoaderWindowsDYLD.cpp |  2 +-
 .../MIPS/EmulateInstructionMIPS.cpp           |  5 +-
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  |  5 +-
 .../UnwindAssemblyInstEmulation.cpp           |  6 +-
 .../UnwindAssembly/x86/UnwindAssembly-x86.cpp | 21 +++----
 lldb/source/Symbol/Function.cpp               |  7 +--
 lldb/source/Symbol/Symbol.cpp                 |  3 +-
 lldb/source/Target/Process.cpp                |  4 +-
 lldb/source/Target/StackFrame.cpp             |  8 +--
 lldb/source/Target/Target.cpp                 | 62 ++++++++++---------
 lldb/source/Target/ThreadPlanStepRange.cpp    |  3 +-
 lldb/source/Target/Trace.cpp                  |  8 +--
 28 files changed, 122 insertions(+), 133 deletions(-)

diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h
index 9a694de0f60a..0a76f0a12b9d 100644
--- a/lldb/include/lldb/Core/Disassembler.h
+++ b/lldb/include/lldb/Core/Disassembler.h
@@ -394,10 +394,12 @@ class Disassembler : public std::enable_shared_from_this<Disassembler>,
     lldb::addr_t value;
   };
 
-  static lldb::DisassemblerSP
-  DisassembleRange(const ArchSpec &arch, const char *plugin_name,
-                   const char *flavor, Target &target,
-                   const AddressRange &disasm_range, bool prefer_file_cache);
+  static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch,
+                                               const char *plugin_name,
+                                               const char *flavor,
+                                               Target &target,
+                                               const AddressRange &disasm_range,
+                                               bool force_live_memory = false);
 
   static lldb::DisassemblerSP
   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
@@ -426,7 +428,8 @@ class Disassembler : public std::enable_shared_from_this<Disassembler>,
                          Stream &strm);
 
   size_t ParseInstructions(Target &target, Address address, Limit limit,
-                           Stream *error_strm_ptr, bool prefer_file_cache);
+                           Stream *error_strm_ptr,
+                           bool force_live_memory = false);
 
   virtual size_t DecodeInstructions(const Address &base_addr,
                                     const DataExtractor &data,
diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index 300d829219d4..aae5b4a496c2 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -631,10 +631,10 @@ class Function : public UserID, public SymbolContextScope {
 
   lldb::DisassemblerSP GetInstructions(const ExecutionContext &exe_ctx,
                                        const char *flavor,
-                                       bool prefer_file_cache);
+                                       bool force_live_memory = false);
 
   bool GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor,
-                      bool prefer_file_cache, Stream &strm);
+                      Stream &strm, bool force_live_memory = false);
 
 protected:
   enum {
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index 0c2131a60b4b..219213312f64 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -1005,11 +1005,12 @@ class Target : public std::enable_shared_from_this<Target>,
   // read from const sections in object files, read from the target. This
   // version of ReadMemory will try and read memory from the process if the
   // process is alive. The order is:
-  // 1 - if (prefer_file_cache == true) then read from object file cache
-  // 2 - if there is a valid process, try and read from its memory
-  // 3 - if (prefer_file_cache == false) then read from object file cache
-  size_t ReadMemory(const Address &addr, bool prefer_file_cache, void *dst,
-                    size_t dst_len, Status &error,
+  // 1 - if (force_live_memory == false) and the address falls in a read-only
+  // section, then read from the file cache
+  // 2 - if there is a process, then read from memory
+  // 3 - if there is no process, then read from the file cache
+  size_t ReadMemory(const Address &addr, void *dst, size_t dst_len,
+                    Status &error, bool force_live_memory = false,
                     lldb::addr_t *load_addr_ptr = nullptr);
 
   size_t ReadCStringFromMemory(const Address &addr, std::string &out_str,
@@ -1018,18 +1019,19 @@ class Target : public std::enable_shared_from_this<Target>,
   size_t ReadCStringFromMemory(const Address &addr, char *dst,
                                size_t dst_max_len, Status &result_error);
 
-  size_t ReadScalarIntegerFromMemory(const Address &addr,
-                                     bool prefer_file_cache, uint32_t byte_size,
+  size_t ReadScalarIntegerFromMemory(const Address &addr, uint32_t byte_size,
                                      bool is_signed, Scalar &scalar,
-                                     Status &error);
+                                     Status &error,
+                                     bool force_live_memory = false);
 
   uint64_t ReadUnsignedIntegerFromMemory(const Address &addr,
-                                         bool prefer_file_cache,
                                          size_t integer_byte_size,
-                                         uint64_t fail_value, Status &error);
+                                         uint64_t fail_value, Status &error,
+                                         bool force_live_memory = false);
 
-  bool ReadPointerFromMemory(const Address &addr, bool prefer_file_cache,
-                             Status &error, Address &pointer_addr);
+  bool ReadPointerFromMemory(const Address &addr, Status &error,
+                             Address &pointer_addr,
+                             bool force_live_memory = false);
 
   SectionLoadList &GetSectionLoadList() {
     return m_section_load_history.GetCurrentSectionLoadList();
diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp
index 9f3cf817fc8c..7d8171634752 100644
--- a/lldb/source/API/SBFunction.cpp
+++ b/lldb/source/API/SBFunction.cpp
@@ -132,10 +132,10 @@ SBInstructionList SBFunction::GetInstructions(SBTarget target,
         m_opaque_ptr->GetAddressRange().GetBaseAddress().GetModule());
     if (target_sp && module_sp) {
       lock = std::unique_lock<std::recursive_mutex>(target_sp->GetAPIMutex());
-      const bool prefer_file_cache = false;
+      const bool force_live_memory = true;
       sb_instructions.SetDisassembler(Disassembler::DisassembleRange(
           module_sp->GetArchitecture(), nullptr, flavor, *target_sp,
-          m_opaque_ptr->GetAddressRange(), prefer_file_cache));
+          m_opaque_ptr->GetAddressRange(), force_live_memory));
     }
   }
   return LLDB_RECORD_RESULT(sb_instructions);
diff --git a/lldb/source/API/SBSymbol.cpp b/lldb/source/API/SBSymbol.cpp
index eafc3e630bcd..7b9c90032e1a 100644
--- a/lldb/source/API/SBSymbol.cpp
+++ b/lldb/source/API/SBSymbol.cpp
@@ -132,10 +132,10 @@ SBInstructionList SBSymbol::GetInstructions(SBTarget target,
       ModuleSP module_sp = symbol_addr.GetModule();
       if (module_sp) {
         AddressRange symbol_range(symbol_addr, m_opaque_ptr->GetByteSize());
-        const bool prefer_file_cache = false;
+        const bool force_live_memory = true;
         sb_instructions.SetDisassembler(Disassembler::DisassembleRange(
             module_sp->GetArchitecture(), nullptr, flavor_string, *target_sp,
-            symbol_range, prefer_file_cache));
+            symbol_range, force_live_memory));
       }
     }
   }
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index ad1ef6910701..b0b12dc73d88 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -705,7 +705,7 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size,
   if (target_sp) {
     std::lock_guard<std::recursive_mutex> guard(target_sp->GetAPIMutex());
     bytes_read =
-        target_sp->ReadMemory(addr.ref(), false, buf, size, sb_error.ref());
+        target_sp->ReadMemory(addr.ref(), buf, size, sb_error.ref(), true);
   } else {
     sb_error.SetErrorString("invalid target");
   }
@@ -2085,12 +2085,12 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
     if (addr_ptr) {
       DataBufferHeap data(
           target_sp->GetArchitecture().GetMaximumOpcodeByteSize() * count, 0);
-      bool prefer_file_cache = false;
+      bool force_live_memory = true;
       lldb_private::Status error;
       lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
       const size_t bytes_read =
-          target_sp->ReadMemory(*addr_ptr, prefer_file_cache, data.GetBytes(),
-                                data.GetByteSize(), error, &load_addr);
+          target_sp->ReadMemory(*addr_ptr, data.GetBytes(), data.GetByteSize(),
+                                error, force_live_memory, &load_addr);
       const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
       sb_instructions.SetDisassembler(Disassembler::DisassembleBytes(
           target_sp->GetArchitecture(), nullptr, flavor_string, *addr_ptr,
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index a2201c4d8bd9..19c13c089503 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -669,8 +669,8 @@ class CommandObjectMemoryRead : public CommandObjectParsed {
       }
 
       Address address(addr, nullptr);
-      bytes_read = target->ReadMemory(address, false, data_sp->GetBytes(),
-                                      data_sp->GetByteSize(), error);
+      bytes_read = target->ReadMemory(address, data_sp->GetBytes(),
+                                      data_sp->GetByteSize(), error, true);
       if (bytes_read == 0) {
         const char *error_cstr = error.AsCString();
         if (error_cstr && error_cstr[0]) {
diff --git a/lldb/source/Core/Address.cpp b/lldb/source/Core/Address.cpp
index 9d52f1db8918..24bb8417b69b 100644
--- a/lldb/source/Core/Address.cpp
+++ b/lldb/source/Core/Address.cpp
@@ -65,9 +65,9 @@ static size_t ReadBytes(ExecutionContextScope *exe_scope,
   TargetSP target_sp(exe_scope->CalculateTarget());
   if (target_sp) {
     Status error;
-    bool prefer_file_cache = false;
-    return target_sp->ReadMemory(address, prefer_file_cache, dst, dst_len,
-                                 error);
+    bool force_live_memory = true;
+    return target_sp->ReadMemory(address, dst, dst_len, error,
+                                 force_live_memory);
   }
   return 0;
 }
diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp
index 3a975d9296f4..60c41e1d00ed 100644
--- a/lldb/source/Core/Disassembler.cpp
+++ b/lldb/source/Core/Disassembler.cpp
@@ -122,7 +122,7 @@ static Address ResolveAddress(Target &target, const Address &addr) {
 
 lldb::DisassemblerSP Disassembler::DisassembleRange(
     const ArchSpec &arch, const char *plugin_name, const char *flavor,
-    Target &target, const AddressRange &range, bool prefer_file_cache) {
+    Target &target, const AddressRange &range, bool force_live_memory) {
   if (range.GetByteSize() <= 0)
     return {};
 
@@ -137,7 +137,7 @@ lldb::DisassemblerSP Disassembler::DisassembleRange(
 
   const size_t bytes_disassembled = disasm_sp->ParseInstructions(
       target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
-      nullptr, prefer_file_cache);
+      nullptr, force_live_memory);
   if (bytes_disassembled == 0)
     return {};
 
@@ -181,9 +181,9 @@ bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
   if (!disasm_sp)
     return false;
 
-  const bool prefer_file_cache = false;
+  const bool force_live_memory = true;
   size_t bytes_disassembled = disasm_sp->ParseInstructions(
-      exe_ctx.GetTargetRef(), address, limit, &strm, prefer_file_cache);
+      exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
   if (bytes_disassembled == 0)
     return false;
 
@@ -1036,7 +1036,7 @@ InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
 
 size_t Disassembler::ParseInstructions(Target &target, Address start,
                                        Limit limit, Stream *error_strm_ptr,
-                                       bool prefer_file_cache) {
+                                       bool force_live_memory) {
   m_instruction_list.Clear();
 
   if (!start.IsValid())
@@ -1052,8 +1052,8 @@ size_t Disassembler::ParseInstructions(Target &target, Address start,
   Status error;
   lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
   const size_t bytes_read =
-      target.ReadMemory(start, prefer_file_cache, data_sp->GetBytes(),
-                        data_sp->GetByteSize(), error, &load_addr);
+      target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
+                        error, force_live_memory, &load_addr);
   const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
 
   if (bytes_read == 0) {
diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp
index a1c9daa79be7..1de83d896bf3 100644
--- a/lldb/source/Core/IOHandlerCursesGUI.cpp
+++ b/lldb/source/Core/IOHandlerCursesGUI.cpp
@@ -3550,7 +3550,7 @@ class SourceFileWindowDelegate : public WindowDelegate {
             if (m_disassembly_scope != m_sc.function) {
               m_disassembly_scope = m_sc.function;
               m_disassembly_sp = m_sc.function->GetInstructions(
-                  exe_ctx, nullptr, prefer_file_cache);
+                  exe_ctx, nullptr, !prefer_file_cache);
               if (m_disassembly_sp) {
                 set_selected_line_to_pc = true;
                 m_disassembly_range = m_sc.function->GetAddressRange();
diff --git a/lldb/source/Core/Value.cpp b/lldb/source/Core/Value.cpp
index 7b52ff9e4207..b5af03c0d001 100644
--- a/lldb/source/Core/Value.cpp
+++ b/lldb/source/Core/Value.cpp
@@ -525,15 +525,10 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
     } else if ((address_type == eAddressTypeLoad) ||
                (address_type == eAddressTypeFile)) {
       if (file_so_addr.IsValid()) {
-        // We have a file address that we were able to translate into a section
-        // offset address so we might be able to read this from the object
-        // files if we don't have a live process. Lets always try and read from
-        // the process if we have one though since we want to read the actual
-        // value by setting "prefer_file_cache" to false.
-        const bool prefer_file_cache = false;
-        if (exe_ctx->GetTargetRef().ReadMemory(file_so_addr, prefer_file_cache,
-                                               dst, byte_size,
-                                               error) != byte_size) {
+        const bool force_live_memory = true;
+        if (exe_ctx->GetTargetRef().ReadMemory(file_so_addr, dst, byte_size,
+                                               error, force_live_memory) !=
+            byte_size) {
           error.SetErrorStringWithFormat(
               "read memory from 0x%" PRIx64 " failed", (uint64_t)address);
         }
diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 1988155a16ac..55fc5ff64172 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -721,7 +721,7 @@ size_t ValueObject::GetPointeeData(DataExtractor &data, uint32_t item_idx,
         if (target) {
           heap_buf_ptr->SetByteSize(bytes);
           size_t bytes_read = target->ReadMemory(
-              so_addr, false, heap_buf_ptr->GetBytes(), bytes, error);
+              so_addr, heap_buf_ptr->GetBytes(), bytes, error, true);
           if (error.Success()) {
             data.SetData(data_sp);
             return bytes_read;
diff --git a/lldb/source/Expression/IRMemoryMap.cpp b/lldb/source/Expression/IRMemoryMap.cpp
index 6b1e4c313a39..4ae2724d4dd8 100644
--- a/lldb/source/Expression/IRMemoryMap.cpp
+++ b/lldb/source/Expression/IRMemoryMap.cpp
@@ -639,7 +639,7 @@ void IRMemoryMap::ReadMemory(uint8_t *bytes, lldb::addr_t process_address,
 
     if (target_sp) {
       Address absolute_address(process_address);
-      target_sp->ReadMemory(absolute_address, false, bytes, size, error);
+      target_sp->ReadMemory(absolute_address, bytes, size, error, true);
       return;
     }
 
diff --git a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
index 22508969ceed..757c91570009 100644
--- a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
+++ b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
@@ -160,7 +160,6 @@ Instruction *ArchitectureMips::GetInstructionAtAddress(
 
   InstructionList instruction_list;
   InstructionSP prev_insn;
-  bool prefer_file_cache = true; // Read from file
   uint32_t inst_to_choose = 0;
 
   Address addr = resolved_addr;
@@ -171,8 +170,7 @@ Instruction *ArchitectureMips::GetInstructionAtAddress(
     uint32_t insn_size = 0;
 
     disasm_sp->ParseInstructions(target, addr,
-                                 {Disassembler::Limit::Bytes, i * 2}, nullptr,
-                                 prefer_file_cache);
+                                 {Disassembler::Limit::Bytes, i * 2}, nullptr);
 
     uint32_t num_insns = disasm_sp->GetInstructionList().GetSize();
     if (num_insns) {
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index fd1916d296d5..575a882697e0 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -1093,16 +1093,16 @@ bool DynamicLoaderDarwinKernel::ReadKextSummaryHeader() {
     uint8_t buf[24];
     DataExtractor data(buf, sizeof(buf), byte_order, addr_size);
     const size_t count = 4 * sizeof(uint32_t) + addr_size;
-    const bool prefer_file_cache = false;
+    const bool force_live_memory = true;
     if (m_process->GetTarget().ReadPointerFromMemory(
-            m_kext_summary_header_ptr_addr, prefer_file_cache, error,
-            m_kext_summary_header_addr)) {
+            m_kext_summary_header_ptr_addr, error,
+            m_kext_summary_header_addr, force_live_memory)) {
       // We got a valid address for our kext summary header and make sure it
       // isn't NULL
       if (m_kext_summary_header_addr.IsValid() &&
           m_kext_summary_header_addr.GetFileAddress() != 0) {
         const size_t bytes_read = m_process->GetTarget().ReadMemory(
-            m_kext_summary_header_addr, prefer_file_cache, buf, count, error);
+            m_kext_summary_header_addr, buf, count, error, force_live_memory);
         if (bytes_read == count) {
           lldb::offset_t offset = 0;
           m_kext_summary_header.version = data.GetU32(&offset);
@@ -1373,10 +1373,9 @@ uint32_t DynamicLoaderDarwinKernel::ReadKextSummaries(
   DataBufferHeap data(count, 0);
   Status error;
 
-  const bool prefer_file_cache = false;
+  const bool force_live_memory = true;
   const size_t bytes_read = m_process->GetTarget().ReadMemory(
-      kext_summary_addr, prefer_file_cache, data.GetBytes(), data.GetByteSize(),
-      error);
+      kext_summary_addr, data.GetBytes(), data.GetByteSize(), error, force_live_memory);
   if (bytes_read == count) {
 
     DataExtractor extractor(data.GetBytes(), data.GetByteSize(), endian,
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index ddf6f1212a3e..ac4311260600 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -1086,7 +1086,7 @@ DynamicLoaderDarwin::GetThreadLocalData(const lldb::ModuleSP module_sp,
     Status error;
     const size_t tsl_data_size = addr_size * 3;
     Target &target = m_process->GetTarget();
-    if (target.ReadMemory(tls_addr, false, buf, tsl_data_size, error) ==
+    if (target.ReadMemory(tls_addr, buf, tsl_data_size, error, true) ==
         tsl_data_size) {
       const ByteOrder byte_order = m_process->GetByteOrder();
       DataExtractor data(buf, sizeof(buf), byte_order, addr_size);
diff --git a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
index 7f9504b9b3a9..f0a01ed4ba6e 100644
--- a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
@@ -193,7 +193,7 @@ DynamicLoaderWindowsDYLD::GetStepThroughTrampolinePlan(Thread &thread,
   AddressRange range(pc, 2 * 15);
 
   DisassemblerSP disassembler_sp = Disassembler::DisassembleRange(
-      arch, nullptr, nullptr, m_process->GetTarget(), range, true);
+      arch, nullptr, nullptr, m_process->GetTarget(), range);
   if (!disassembler_sp) {
     return ThreadPlanSP();
   }
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
index d4cb726fc7e5..c0ec43158454 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
@@ -1018,8 +1018,9 @@ bool EmulateInstructionMIPS::SetInstruction(const Opcode &insn_opcode,
 
       const size_t bytes_read =
           target->ReadMemory(next_addr, /* Address of next instruction */
-                             true,      /* prefer_file_cache */
-                             buf, sizeof(uint32_t), error, &load_addr);
+                             buf, sizeof(uint32_t), error, 
+                             false,  /* force_live_memory */
+                             &load_addr);
 
       if (bytes_read == 0)
         return true;
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index ae432ac89eaa..f30ed427f853 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -859,8 +859,7 @@ Address ObjectFileELF::GetImageInfoAddress(Target *target) {
       if (symbol.d_tag == DT_MIPS_RLD_MAP) {
         // DT_MIPS_RLD_MAP tag stores an absolute address of the debug pointer.
         Address addr;
-        if (target->ReadPointerFromMemory(dyn_base + offset, false, error,
-                                          addr))
+        if (target->ReadPointerFromMemory(dyn_base + offset, error, addr, true))
           return addr;
       }
       if (symbol.d_tag == DT_MIPS_RLD_MAP_REL) {
@@ -868,7 +867,7 @@ Address ObjectFileELF::GetImageInfoAddress(Target *target) {
         // relative to the address of the tag.
         uint64_t rel_offset;
         rel_offset = target->ReadUnsignedIntegerFromMemory(
-            dyn_base + offset, false, GetAddressByteSize(), UINT64_MAX, error);
+            dyn_base + offset, GetAddressByteSize(), UINT64_MAX, error, true);
         if (error.Success() && rel_offset != UINT64_MAX) {
           Address addr;
           addr_t debug_ptr_address =
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index 1bc071c2b161..65947c5f833b 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -38,10 +38,10 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
   ProcessSP process_sp(thread.GetProcess());
   if (process_sp) {
     Status error;
-    const bool prefer_file_cache = true;
+    const bool force_live_memory = true;
     if (process_sp->GetTarget().ReadMemory(
-            range.GetBaseAddress(), prefer_file_cache, function_text.data(),
-            range.GetByteSize(), error) != range.GetByteSize()) {
+            range.GetBaseAddress(), function_text.data(), range.GetByteSize(),
+            error, force_live_memory) != range.GetByteSize()) {
       return false;
     }
   }
diff --git a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
index fe1275d5b0cf..402a70cd025f 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
@@ -51,12 +51,11 @@ bool UnwindAssembly_x86::GetNonCallSiteUnwindPlanFromAssembly(
   ProcessSP process_sp(thread.GetProcess());
   if (process_sp.get() == nullptr)
     return false;
-  const bool prefer_file_cache = true;
   std::vector<uint8_t> function_text(func.GetByteSize());
   Status error;
   if (process_sp->GetTarget().ReadMemory(
-          func.GetBaseAddress(), prefer_file_cache, function_text.data(),
-          func.GetByteSize(), error) == func.GetByteSize()) {
+          func.GetBaseAddress(), function_text.data(), func.GetByteSize(),
+          error) == func.GetByteSize()) {
     RegisterContextSP reg_ctx(thread.GetRegisterContext());
     m_assembly_inspection_engine->Initialize(reg_ctx);
     return m_assembly_inspection_engine->GetNonCallSiteUnwindPlanFromAssembly(
@@ -153,12 +152,11 @@ bool UnwindAssembly_x86::AugmentUnwindPlanFromCallSite(
       return false;
     if (m_assembly_inspection_engine == nullptr)
       return false;
-    const bool prefer_file_cache = true;
     std::vector<uint8_t> function_text(func.GetByteSize());
     Status error;
     if (process_sp->GetTarget().ReadMemory(
-            func.GetBaseAddress(), prefer_file_cache, function_text.data(),
-            func.GetByteSize(), error) == func.GetByteSize()) {
+            func.GetBaseAddress(), function_text.data(), func.GetByteSize(),
+            error) == func.GetByteSize()) {
       RegisterContextSP reg_ctx(thread.GetRegisterContext());
       m_assembly_inspection_engine->Initialize(reg_ctx);
       return m_assembly_inspection_engine->AugmentUnwindPlanFromCallSite(
@@ -185,10 +183,9 @@ bool UnwindAssembly_x86::GetFastUnwindPlan(AddressRange &func, Thread &thread,
   ProcessSP process_sp = thread.GetProcess();
   if (process_sp) {
     Target &target(process_sp->GetTarget());
-    const bool prefer_file_cache = true;
     Status error;
-    if (target.ReadMemory(func.GetBaseAddress(), prefer_file_cache,
-                          opcode_data.data(), 4, error) == 4) {
+    if (target.ReadMemory(func.GetBaseAddress(), opcode_data.data(), 4,
+                          error) == 4) {
       uint8_t i386_push_mov[] = {0x55, 0x89, 0xe5};
       uint8_t x86_64_push_mov[] = {0x55, 0x48, 0x89, 0xe5};
 
@@ -220,12 +217,10 @@ bool UnwindAssembly_x86::FirstNonPrologueInsn(
   if (m_assembly_inspection_engine == nullptr)
     return false;
 
-  const bool prefer_file_cache = true;
   std::vector<uint8_t> function_text(func.GetByteSize());
   Status error;
-  if (target->ReadMemory(func.GetBaseAddress(), prefer_file_cache,
-                         function_text.data(), func.GetByteSize(),
-                         error) == func.GetByteSize()) {
+  if (target->ReadMemory(func.GetBaseAddress(), function_text.data(),
+                         func.GetByteSize(), error) == func.GetByteSize()) {
     size_t offset;
     if (m_assembly_inspection_engine->FindFirstNonPrologueInstruction(
             function_text.data(), func.GetByteSize(), offset)) {
diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index 67013f6dd8b1..0aeb7597cfcf 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -426,17 +426,16 @@ lldb::DisassemblerSP Function::GetInstructions(const ExecutionContext &exe_ctx,
                                                bool prefer_file_cache) {
   ModuleSP module_sp(GetAddressRange().GetBaseAddress().GetModule());
   if (module_sp && exe_ctx.HasTargetScope()) {
-    const bool prefer_file_cache = false;
     return Disassembler::DisassembleRange(module_sp->GetArchitecture(), nullptr,
                                           flavor, exe_ctx.GetTargetRef(),
-                                          GetAddressRange(), prefer_file_cache);
+                                          GetAddressRange(), !prefer_file_cache);
   }
   return lldb::DisassemblerSP();
 }
 
 bool Function::GetDisassembly(const ExecutionContext &exe_ctx,
-                              const char *flavor, bool prefer_file_cache,
-                              Stream &strm) {
+                              const char *flavor, Stream &strm,
+                              bool prefer_file_cache) {
   lldb::DisassemblerSP disassembler_sp =
       GetInstructions(exe_ctx, flavor, prefer_file_cache);
   if (disassembler_sp) {
diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp
index 8d099e0cc7e1..e6695c5008ea 100644
--- a/lldb/source/Symbol/Symbol.cpp
+++ b/lldb/source/Symbol/Symbol.cpp
@@ -542,10 +542,9 @@ lldb::DisassemblerSP Symbol::GetInstructions(const ExecutionContext &exe_ctx,
                                              bool prefer_file_cache) {
   ModuleSP module_sp(m_addr_range.GetBaseAddress().GetModule());
   if (module_sp && exe_ctx.HasTargetScope()) {
-    const bool prefer_file_cache = false;
     return Disassembler::DisassembleRange(module_sp->GetArchitecture(), nullptr,
                                           flavor, exe_ctx.GetTargetRef(),
-                                          m_addr_range, prefer_file_cache);
+                                          m_addr_range, !prefer_file_cache);
   }
   return lldb::DisassemblerSP();
 }
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 9f39e78e5d72..5af8567733e3 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -5810,10 +5810,8 @@ Process::AdvanceAddressToNextBranchInstruction(Address default_stop_addr,
 
   const char *plugin_name = nullptr;
   const char *flavor = nullptr;
-  const bool prefer_file_cache = true;
   disassembler_sp = Disassembler::DisassembleRange(
-      target.GetArchitecture(), plugin_name, flavor, GetTarget(), range_bounds,
-      prefer_file_cache);
+      target.GetArchitecture(), plugin_name, flavor, GetTarget(), range_bounds);
   if (disassembler_sp)
     insn_list = &disassembler_sp->GetInstructionList();
 
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 5402ba2626b1..cba51d266c5b 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -1294,11 +1294,11 @@ lldb::ValueObjectSP StackFrame::GuessValueForAddress(lldb::addr_t addr) {
 
   const char *plugin_name = nullptr;
   const char *flavor = nullptr;
-  const bool prefer_file_cache = false;
+  const bool force_live_memory = true;
 
   DisassemblerSP disassembler_sp =
       Disassembler::DisassembleRange(target_arch, plugin_name, flavor,
-                                     *target_sp, pc_range, prefer_file_cache);
+                                     *target_sp, pc_range, force_live_memory);
 
   if (!disassembler_sp || !disassembler_sp->GetInstructionList().GetSize()) {
     return ValueObjectSP();
@@ -1674,10 +1674,10 @@ lldb::ValueObjectSP StackFrame::GuessValueForRegisterAndOffset(ConstString reg,
 
   const char *plugin_name = nullptr;
   const char *flavor = nullptr;
-  const bool prefer_file_cache = false;
+  const bool force_live_memory = true;
   DisassemblerSP disassembler_sp =
       Disassembler::DisassembleRange(target_arch, plugin_name, flavor,
-                                     *target_sp, pc_range, prefer_file_cache);
+                                     *target_sp, pc_range, force_live_memory);
 
   if (!disassembler_sp || !disassembler_sp->GetInstructionList().GetSize()) {
     return ValueObjectSP();
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 3aa1d30cb776..177efa8f8f28 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -1717,8 +1717,8 @@ size_t Target::ReadMemoryFromFileCache(const Address &addr, void *dst,
   return 0;
 }
 
-size_t Target::ReadMemory(const Address &addr, bool prefer_file_cache,
-                          void *dst, size_t dst_len, Status &error,
+size_t Target::ReadMemory(const Address &addr, void *dst, size_t dst_len,
+                          Status &error, bool force_live_memory,
                           lldb::addr_t *load_addr_ptr) {
   error.Clear();
 
@@ -1753,10 +1753,20 @@ size_t Target::ReadMemory(const Address &addr, bool prefer_file_cache,
   if (!resolved_addr.IsValid())
     resolved_addr = addr;
 
-  if (prefer_file_cache) {
-    bytes_read = ReadMemoryFromFileCache(resolved_addr, dst, dst_len, error);
-    if (bytes_read > 0)
-      return bytes_read;
+  bool is_readonly = false;
+  // Read from file cache if read-only section.
+  if (!force_live_memory && resolved_addr.IsSectionOffset()) {
+    SectionSP section_sp(addr.GetSection());
+    if (section_sp) {
+      auto permissions = Flags(section_sp->GetPermissions());
+      is_readonly = !permissions.Test(ePermissionsWritable) &&
+                    permissions.Test(ePermissionsReadable);
+    }
+    if (is_readonly) {
+      bytes_read = ReadMemoryFromFileCache(resolved_addr, dst, dst_len, error);
+      if (bytes_read > 0)
+        return bytes_read;
+    }
   }
 
   if (ProcessIsValid()) {
@@ -1791,17 +1801,10 @@ size_t Target::ReadMemory(const Address &addr, bool prefer_file_cache,
           *load_addr_ptr = load_addr;
         return bytes_read;
       }
-      // If the address is not section offset we have an address that doesn't
-      // resolve to any address in any currently loaded shared libraries and we
-      // failed to read memory so there isn't anything more we can do. If it is
-      // section offset, we might be able to read cached memory from the object
-      // file.
-      if (!resolved_addr.IsSectionOffset())
-        return 0;
     }
   }
 
-  if (!prefer_file_cache && resolved_addr.IsSectionOffset()) {
+  if (!is_readonly && resolved_addr.IsSectionOffset()) {
     // If we didn't already try and read from the object file cache, then try
     // it after failing to read from the process.
     return ReadMemoryFromFileCache(resolved_addr, dst, dst_len, error);
@@ -1856,7 +1859,7 @@ size_t Target::ReadCStringFromMemory(const Address &addr, char *dst,
       addr_t bytes_to_read =
           std::min<addr_t>(bytes_left, cache_line_bytes_left);
       size_t bytes_read =
-          ReadMemory(address, false, curr_dst, bytes_to_read, error);
+          ReadMemory(address, curr_dst, bytes_to_read, error, true);
 
       if (bytes_read == 0) {
         result_error = error;
@@ -1884,15 +1887,15 @@ size_t Target::ReadCStringFromMemory(const Address &addr, char *dst,
   return total_cstr_len;
 }
 
-size_t Target::ReadScalarIntegerFromMemory(const Address &addr,
-                                           bool prefer_file_cache,
-                                           uint32_t byte_size, bool is_signed,
-                                           Scalar &scalar, Status &error) {
+size_t Target::ReadScalarIntegerFromMemory(const Address &addr, uint32_t byte_size,
+                                           bool is_signed, Scalar &scalar,
+                                           Status &error,
+                                           bool force_live_memory) {
   uint64_t uval;
 
   if (byte_size <= sizeof(uval)) {
     size_t bytes_read =
-        ReadMemory(addr, prefer_file_cache, &uval, byte_size, error);
+        ReadMemory(addr, &uval, byte_size, error, force_live_memory);
     if (bytes_read == byte_size) {
       DataExtractor data(&uval, sizeof(uval), m_arch.GetSpec().GetByteOrder(),
                          m_arch.GetSpec().GetAddressByteSize());
@@ -1914,23 +1917,22 @@ size_t Target::ReadScalarIntegerFromMemory(const Address &addr,
 }
 
 uint64_t Target::ReadUnsignedIntegerFromMemory(const Address &addr,
-                                               bool prefer_file_cache,
                                                size_t integer_byte_size,
-                                               uint64_t fail_value,
-                                               Status &error) {
+                                               uint64_t fail_value, Status &error,
+                                               bool force_live_memory) {
   Scalar scalar;
-  if (ReadScalarIntegerFromMemory(addr, prefer_file_cache, integer_byte_size,
-                                  false, scalar, error))
+  if (ReadScalarIntegerFromMemory(addr, integer_byte_size, false, scalar, error,
+                                  force_live_memory))
     return scalar.ULongLong(fail_value);
   return fail_value;
 }
 
-bool Target::ReadPointerFromMemory(const Address &addr, bool prefer_file_cache,
-                                   Status &error, Address &pointer_addr) {
+bool Target::ReadPointerFromMemory(const Address &addr, Status &error,
+                                   Address &pointer_addr,
+                                   bool force_live_memory) {
   Scalar scalar;
-  if (ReadScalarIntegerFromMemory(addr, prefer_file_cache,
-                                  m_arch.GetSpec().GetAddressByteSize(), false,
-                                  scalar, error)) {
+  if (ReadScalarIntegerFromMemory(addr, m_arch.GetSpec().GetAddressByteSize(),
+                                  false, scalar, error, force_live_memory)) {
     addr_t pointer_vm_addr = scalar.ULongLong(LLDB_INVALID_ADDRESS);
     if (pointer_vm_addr != LLDB_INVALID_ADDRESS) {
       SectionLoadList &section_load_list = GetSectionLoadList();
diff --git a/lldb/source/Target/ThreadPlanStepRange.cpp b/lldb/source/Target/ThreadPlanStepRange.cpp
index 3c42cd750dad..896e647bbb52 100644
--- a/lldb/source/Target/ThreadPlanStepRange.cpp
+++ b/lldb/source/Target/ThreadPlanStepRange.cpp
@@ -264,10 +264,9 @@ InstructionList *ThreadPlanStepRange::GetInstructionsForAddress(
         // Disassemble the address range given:
         const char *plugin_name = nullptr;
         const char *flavor = nullptr;
-        const bool prefer_file_cache = true;
         m_instruction_ranges[i] = Disassembler::DisassembleRange(
             GetTarget().GetArchitecture(), plugin_name, flavor, GetTarget(),
-            m_address_ranges[i], prefer_file_cache);
+            m_address_ranges[i]);
       }
       if (!m_instruction_ranges[i])
         return nullptr;
diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp
index c2de3dac48b2..7f58dbc70039 100644
--- a/lldb/source/Target/Trace.cpp
+++ b/lldb/source/Target/Trace.cpp
@@ -200,7 +200,7 @@ DumpInstructionInfo(Stream &s, const SymbolContext &sc,
   // Now we try using the current function's disassembler
   if (sc.function) {
     DisassemblerSP disassembler =
-        sc.function->GetInstructions(exe_ctx, nullptr, true);
+        sc.function->GetInstructions(exe_ctx, nullptr);
     if (TryDumpInstructionInfo(s, disassembler, exe_ctx, address))
       return disassembler;
   }
@@ -209,9 +209,9 @@ DumpInstructionInfo(Stream &s, const SymbolContext &sc,
   Target &target = exe_ctx.GetTargetRef();
   const ArchSpec &arch = target.GetArchitecture();
   AddressRange range(address, arch.GetMaximumOpcodeByteSize() * 1);
-  DisassemblerSP disassembler = Disassembler::DisassembleRange(
-      arch, /*plugin_name*/ nullptr,
-      /*flavor*/ nullptr, target, range, /*prefer_file_cache*/ true);
+  DisassemblerSP disassembler =
+      Disassembler::DisassembleRange(arch, /*plugin_name*/ nullptr,
+                                     /*flavor*/ nullptr, target, range);
   if (TryDumpInstructionInfo(s, disassembler, exe_ctx, address))
     return disassembler;
   return nullptr;

From 06995fe256ec5a80092d5045c30b2c94f5ed8232 Mon Sep 17 00:00:00 2001
From: Ben Shi <powerman1st@163.com>
Date: Sat, 17 Apr 2021 07:17:34 +0800
Subject: [PATCH 11/52] [clang][NFC] Fix a potential assert failure

Reviewed By: MaskRay, craig.topper

Differential Revision: https://reviews.llvm.org/D100616
---
 clang/include/clang/Basic/TargetBuiltins.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 0d99ffc8ffce..7fa688633ff6 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -331,9 +331,9 @@ namespace clang {
   }
 
   static constexpr uint64_t LargestBuiltinID = std::max<uint64_t>(
-      {NEON::FirstTSBuiltin, ARM::LastTSBuiltin, SVE::FirstTSBuiltin,
-       AArch64::LastTSBuiltin, BPF::LastTSBuiltin, PPC::LastTSBuiltin,
-       NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, X86::LastTSBuiltin,
+      {ARM::LastTSBuiltin, AArch64::LastTSBuiltin, BPF::LastTSBuiltin,
+       PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin,
+       X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin,
        Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
        Le64::LastTSBuiltin, SystemZ::LastTSBuiltin,
        WebAssembly::LastTSBuiltin});

From ff769dd11128839e00eea546f7e68680d9acfd77 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Fri, 16 Apr 2021 18:51:07 -0500
Subject: [PATCH 12/52] [PowerPC] Minor improvement for insert_vector_elt
 codegen

For v2f64, all VSX subtargets can insert an element with a single
XXPERMDI.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  10 +-
 llvm/lib/Target/PowerPC/PPCInstrVSX.td        |   8 +
 llvm/test/CodeGen/PowerPC/swaps-le-6.ll       | 147 ++++++++++--------
 .../CodeGen/PowerPC/vsx_insert_extract_le.ll  |  38 ++++-
 4 files changed, 127 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 48dba751a230..d4efb2ba6651 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -878,6 +878,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     if (Subtarget.hasVSX()) {
       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
     }
 
     if (Subtarget.hasP8Altivec())
@@ -1247,10 +1248,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
     }
 
-    if (Subtarget.isISA3_1()) {
+    if (Subtarget.isISA3_1())
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
-    }
   }
 
   if (Subtarget.pairedVectorMemops()) {
@@ -10341,6 +10340,9 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   SDValue V2 = Op.getOperand(1);
   SDValue V3 = Op.getOperand(2);
 
+  if (VT == MVT::v2f64 && C)
+    return Op;
+
   if (Subtarget.isISA3_1()) {
     // On P10, we have legal lowering for constant and variable indices for
     // integer vectors.
@@ -10353,7 +10355,7 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
     if (VT == MVT::v4f32 || VT == MVT::v2f64) {
       if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
         return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
-      return SDValue();
+      return Op;
     }
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 471ab32f8778..869e06c49365 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2907,6 +2907,10 @@ def : Pat<WToDPExtractConv.BV02U,
           (v2f64 (XVCVUXWDP $A))>;
 def : Pat<WToDPExtractConv.BV13U,
           (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
+          (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
+          (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
 } // HasVSX, IsBigEndian
 
 // Any little endian VSX subtarget.
@@ -3012,6 +3016,10 @@ def : Pat<WToDPExtractConv.BV02U,
           (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
 def : Pat<WToDPExtractConv.BV13U,
           (v2f64 (XVCVUXWDP $A))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
+          (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
+          (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
 } // HasVSX, IsLittleEndian
 
 // Any pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
index 4437e6799269..e3934ed2a031 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
 ; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -O3 < %s | FileCheck %s
@@ -21,41 +22,48 @@
 
 define void @bar0() {
 ; CHECK-LABEL: bar0:
-; CHECK:   # %bb.0: # %entry
-; CHECK:     addis r3, r2, .LC0@toc@ha
-; CHECK:     addis r4, r2, .LC1@toc@ha
-; CHECK:     ld r3, .LC0@toc@l(r3)
-; CHECK:     addis r3, r2, .LC2@toc@ha
-; CHECK:     ld r3, .LC2@toc@l(r3)
-; CHECK:     xxmrgld vs0, vs0, vs1
-; CHECK:     stxvd2x vs0, 0, r3
-; CHECK:     blr
-;
-; CHECK-P9-NOVECTOR-LABEL: bar0:
-; CHECK-P9-NOVECTOR:   # %bb.0: # %entry
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC0@toc@ha
-; CHECK-P9-NOVECTOR:     ld r3, .LC0@toc@l(r3)
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC1@toc@ha
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC2@toc@ha
-; CHECK-P9-NOVECTOR:     ld r3, .LC2@toc@l(r3)
-; CHECK-P9-NOVECTOR:     xxmrgld vs0, vs1, vs0
-; CHECK-P9-NOVECTOR:     stxvd2x vs0, 0, r3
-; CHECK-P9-NOVECTOR:     blr
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-NEXT:    addis r4, r2, .LC1@toc@ha
+; CHECK-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-NEXT:    lfdx f0, 0, r3
+; CHECK-NEXT:    ld r3, .LC1@toc@l(r4)
+; CHECK-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: bar0:
-; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9:     addis r3, r2, .LC0@toc@ha
-; CHECK-P9:     ld r3, .LC0@toc@l(r3)
-; CHECK-P9:     lxvx vs0, 0, r3
-; CHECK-P9:     addis r3, r2, .LC1@toc@ha
-; CHECK-P9:     ld r3, .LC1@toc@l(r3)
-; CHECK-P9:     lfd f1, 0(r3)
-; CHECK-P9:     addis r3, r2, .LC2@toc@ha
-; CHECK-P9:     ld r3, .LC2@toc@l(r3)
-; CHECK-P9:     xxswapd vs1, f1
-; CHECK-P9:     xxpermdi vs0, vs0, vs1, 1
-; CHECK-P9:     stxvx vs0, 0, r3
-; CHECK-P9:     blr
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NEXT:    lxvx vs0, 0, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC1@toc@l(r3)
+; CHECK-P9-NEXT:    lfd f1, 0(r3)
+; CHECK-P9-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT:    stxvx vs0, 0, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-NOVECTOR-LABEL: bar0:
+; CHECK-P9-NOVECTOR:       # %bb.0: # %entry
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC1@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    lfdx f1, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NOVECTOR-NEXT:    xxmrgld vs0, vs1, vs0
+; CHECK-P9-NOVECTOR-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    blr
 entry:
   %0 = load <2 x double>, <2 x double>* @x, align 16
   %1 = load double, double* @y, align 8
@@ -66,41 +74,48 @@ entry:
 
 define void @bar1() {
 ; CHECK-LABEL: bar1:
-; CHECK:   # %bb.0: # %entry
-; CHECK:     addis r3, r2, .LC0@toc@ha
-; CHECK:     addis r4, r2, .LC1@toc@ha
-; CHECK:     ld r3, .LC0@toc@l(r3)
-; CHECK:     addis r3, r2, .LC2@toc@ha
-; CHECK:     ld r3, .LC2@toc@l(r3)
-; CHECK:     xxpermdi vs0, vs1, vs0, 1
-; CHECK:     stxvd2x vs0, 0, r3
-; CHECK:     blr
-;
-; CHECK-P9-NOVECTOR-LABEL: bar1:
-; CHECK-P9-NOVECTOR:   # %bb.0: # %entry
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC0@toc@ha
-; CHECK-P9-NOVECTOR:     ld r3, .LC0@toc@l(r3)
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC1@toc@ha
-; CHECK-P9-NOVECTOR:     addis r3, r2, .LC2@toc@ha
-; CHECK-P9-NOVECTOR:     ld r3, .LC2@toc@l(r3)
-; CHECK-P9-NOVECTOR:     xxpermdi vs0, vs0, vs1, 1
-; CHECK-P9-NOVECTOR:     stxvd2x vs0, 0, r3
-; CHECK-P9-NOVECTOR:     blr
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-NEXT:    addis r4, r2, .LC1@toc@ha
+; CHECK-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-NEXT:    lfdx f0, 0, r3
+; CHECK-NEXT:    ld r3, .LC1@toc@l(r4)
+; CHECK-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-NEXT:    xxpermdi vs0, vs1, vs0, 1
+; CHECK-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: bar1:
-; CHECK-P9:   # %bb.0: # %entry
-; CHECK-P9:     addis r3, r2, .LC0@toc@ha
-; CHECK-P9:     ld r3, .LC0@toc@l(r3)
-; CHECK-P9:     lxvx vs0, 0, r3
-; CHECK-P9:     addis r3, r2, .LC1@toc@ha
-; CHECK-P9:     ld r3, .LC1@toc@l(r3)
-; CHECK-P9:     lfd f1, 0(r3)
-; CHECK-P9:     addis r3, r2, .LC2@toc@ha
-; CHECK-P9:     ld r3, .LC2@toc@l(r3)
-; CHECK-P9:     xxswapd vs1, f1
-; CHECK-P9:     xxmrgld vs0, vs1, vs0
-; CHECK-P9:     stxvx vs0, 0, r3
-; CHECK-P9:     blr
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NEXT:    lxvx vs0, 0, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC1@toc@l(r3)
+; CHECK-P9-NEXT:    lfd f1, 0(r3)
+; CHECK-P9-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NEXT:    xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9-NEXT:    stxvx vs0, 0, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-NOVECTOR-LABEL: bar1:
+; CHECK-P9-NOVECTOR:       # %bb.0: # %entry
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC0@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC0@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC1@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC1@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    lfdx f1, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    addis r3, r2, .LC2@toc@ha
+; CHECK-P9-NOVECTOR-NEXT:    ld r3, .LC2@toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NOVECTOR-NEXT:    xxpermdi vs0, vs0, vs1, 1
+; CHECK-P9-NOVECTOR-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT:    blr
 entry:
   %0 = load <2 x double>, <2 x double>* @x, align 16
   %1 = load double, double* @y, align 8
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index a198604f79a4..331d7864a228 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -3,6 +3,10 @@
 ; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
 ; RUN:   | FileCheck %s
 
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
+; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-P8-BE
+
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
 ; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
 ; RUN:   | FileCheck --check-prefix=CHECK-P9-VECTOR %s
@@ -20,6 +24,13 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
 ; CHECK-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-NEXT:    blr
 ;
+; CHECK-P8-BE-LABEL: testi0:
+; CHECK-P8-BE:       # %bb.0:
+; CHECK-P8-BE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-BE-NEXT:    lfdx f1, 0, r4
+; CHECK-P8-BE-NEXT:    xxpermdi v2, vs1, vs0, 1
+; CHECK-P8-BE-NEXT:    blr
+;
 ; CHECK-P9-VECTOR-LABEL: testi0:
 ; CHECK-P9-VECTOR:       # %bb.0:
 ; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
@@ -30,10 +41,9 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
 ;
 ; CHECK-P9-LABEL: testi0:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    lfd f1, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs1, f1
-; CHECK-P9-NEXT:    xxpermdi v2, vs0, vs1, 1
+; CHECK-P9-NEXT:    lfd f1, 0(r4)
+; CHECK-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2
@@ -52,6 +62,13 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
 ; CHECK-NEXT:    xxpermdi v2, vs1, vs0, 1
 ; CHECK-NEXT:    blr
 ;
+; CHECK-P8-BE-LABEL: testi1:
+; CHECK-P8-BE:       # %bb.0:
+; CHECK-P8-BE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-BE-NEXT:    lfdx f1, 0, r4
+; CHECK-P8-BE-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-P8-BE-NEXT:    blr
+;
 ; CHECK-P9-VECTOR-LABEL: testi1:
 ; CHECK-P9-VECTOR:       # %bb.0:
 ; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
@@ -62,10 +79,9 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
 ;
 ; CHECK-P9-LABEL: testi1:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    lfd f1, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs1, f1
-; CHECK-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P9-NEXT:    lfd f1, 0(r4)
+; CHECK-P9-NEXT:    xxpermdi v2, vs1, vs0, 1
 ; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2
@@ -82,6 +98,11 @@ define double @teste0(<2 x double>* %p1) {
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
+; CHECK-P8-BE-LABEL: teste0:
+; CHECK-P8-BE:       # %bb.0:
+; CHECK-P8-BE-NEXT:    lfdx f1, 0, r3
+; CHECK-P8-BE-NEXT:    blr
+;
 ; CHECK-P9-VECTOR-LABEL: teste0:
 ; CHECK-P9-VECTOR:       # %bb.0:
 ; CHECK-P9-VECTOR-NEXT:    lxvd2x vs1, 0, r3
@@ -107,6 +128,11 @@ define double @teste1(<2 x double>* %p1) {
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
+; CHECK-P8-BE-LABEL: teste1:
+; CHECK-P8-BE:       # %bb.0:
+; CHECK-P8-BE-NEXT:    lfd f1, 8(r3)
+; CHECK-P8-BE-NEXT:    blr
+;
 ; CHECK-P9-VECTOR-LABEL: teste1:
 ; CHECK-P9-VECTOR:       # %bb.0:
 ; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3

From 1206b95e0703dc0a9b619a095d5564ac51c39d19 Mon Sep 17 00:00:00 2001
From: Ben Barham <ben_barham@apple.com>
Date: Fri, 16 Apr 2021 17:55:39 -0700
Subject: [PATCH 13/52] [ASTReader] Only mark module out of date if not already
 compiled

If a module contains errors (ie. it was built with
-fallow-pcm-with-compiler-errors and had errors) and was from the module
cache, it is marked as out of date - see
a2c1054c303f20be006e9ef20739dbb88bd9ae02.

When a module is imported multiple times in the one compile, this caused
it to be recompiled each time - removing the existing buffer from the
module cache and replacing it. This results in various errors further
down the line.

Instead, only mark the module as out of date if it isn't already
finalized in the module cache.

Reviewed By: akyrtzi

Differential Revision: https://reviews.llvm.org/D100619
---
 clang/lib/Serialization/ASTReader.cpp         |  7 +-
 clang/test/Modules/Inputs/error/error.h       |  2 +
 .../Modules/Inputs/error/module.modulemap     | 10 +++
 clang/test/Modules/Inputs/error/use_error_a.h |  3 +
 clang/test/Modules/Inputs/error/use_error_b.h |  3 +
 clang/test/Modules/load-module-with-errors.m  | 68 +++++++++++++------
 6 files changed, 71 insertions(+), 22 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/error/use_error_a.h
 create mode 100644 clang/test/Modules/Inputs/error/use_error_b.h

diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 72bb125397db..88fb35aae1b8 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2760,9 +2760,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
 
       bool hasErrors = Record[6];
       if (hasErrors && !DisableValidation) {
-        // If requested by the caller, mark modules on error as out-of-date.
-        if (F.Kind == MK_ImplicitModule &&
-            (ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate))
+        // If requested by the caller and the module hasn't already been read
+        // or compiled, mark modules on error as out-of-date.
+        if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) &&
+            !ModuleMgr.getModuleCache().isPCMFinal(F.FileName))
           return OutOfDate;
 
         if (!AllowASTWithCompilerErrors) {
diff --git a/clang/test/Modules/Inputs/error/error.h b/clang/test/Modules/Inputs/error/error.h
index 1b27b21dfd63..fcdb408a4534 100644
--- a/clang/test/Modules/Inputs/error/error.h
+++ b/clang/test/Modules/Inputs/error/error.h
@@ -1,3 +1,5 @@
+#pragma mark mark
+
 @import undefined;
 
 @interface Error
diff --git a/clang/test/Modules/Inputs/error/module.modulemap b/clang/test/Modules/Inputs/error/module.modulemap
index e18239af113b..512e7ed6529e 100644
--- a/clang/test/Modules/Inputs/error/module.modulemap
+++ b/clang/test/Modules/Inputs/error/module.modulemap
@@ -1,3 +1,13 @@
 module error {
   header "error.h"
 }
+
+module use_error_a {
+  header "use_error_a.h"
+  export error
+}
+
+module use_error_b {
+  header "use_error_b.h"
+  export error
+}
diff --git a/clang/test/Modules/Inputs/error/use_error_a.h b/clang/test/Modules/Inputs/error/use_error_a.h
new file mode 100644
index 000000000000..1949c8346041
--- /dev/null
+++ b/clang/test/Modules/Inputs/error/use_error_a.h
@@ -0,0 +1,3 @@
+@import error;
+
+void funca(Error *a);
diff --git a/clang/test/Modules/Inputs/error/use_error_b.h b/clang/test/Modules/Inputs/error/use_error_b.h
new file mode 100644
index 000000000000..025acb7ccf31
--- /dev/null
+++ b/clang/test/Modules/Inputs/error/use_error_b.h
@@ -0,0 +1,3 @@
+@import error;
+
+void funcb(Error *b);
diff --git a/clang/test/Modules/load-module-with-errors.m b/clang/test/Modules/load-module-with-errors.m
index 3a951d2cdaa6..6991d0feb010 100644
--- a/clang/test/Modules/load-module-with-errors.m
+++ b/clang/test/Modules/load-module-with-errors.m
@@ -2,10 +2,13 @@
 // matter in this test.
 
 // pcherror-error@* {{PCH file contains compiler errors}}
-@import error; // notallowerror-error {{could not build module 'error'}}
+@import use_error_a; // notallowerror-error {{could not build module 'use_error_a'}}
+@import use_error_b;
 // expected-no-diagnostics
 
 void test(Error *x) {
+  funca(x);
+  funcb(x);
   [x method];
 }
 
@@ -16,7 +19,16 @@ void test(Error *x) {
 // RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fmodule-name=error -o %t/prebuilt/error.pcm \
 // RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
+// RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-name=use_error_a -o %t/prebuilt/use_error_a.pcm \
+// RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
+// RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-name=use_error_b -o %t/prebuilt/use_error_b.pcm \
+// RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
 
+// Prebuilt modules
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fprebuilt-module-path=%t/prebuilt -fmodules-cache-path=%t \
 // RUN:   -ast-print %s | FileCheck %s
@@ -24,33 +36,49 @@ void test(Error *x) {
 // RUN:   -fprebuilt-module-path=%t/prebuilt -fmodules-cache-path=%t \
 // RUN:   -verify=pcherror %s
 
+// Explicit prebuilt modules (loaded when needed)
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
-// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -ast-print %s | FileCheck %s
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=use_error_a=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=use_error_b=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fsyntax-only -fmodules \
-// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -verify=pcherror %s
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=use_error_a=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=use_error_b=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -verify=pcherror %s
 
+// Explicit prebuilt modules without name (always loaded)
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
-// RUN:   -fmodule-file=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -ast-print %s | FileCheck %s
+// RUN:   -fmodule-file=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -ast-print %s | FileCheck %s
+// As the modules are always loaded, compiling will fail before even parsing
+// this file - this means that -verify can't be used, so do a grep instead.
 // RUN: not %clang_cc1 -fsyntax-only -fmodules \
-// RUN:   -fmodule-file=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -verify=pcherror %s
+// RUN:   -fmodule-file=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t 2>&1 | \
+// RUN: grep "PCH file contains compiler errors"
 
-// Shouldn't build the cached module (that has errors) when not allowing errors
+// Shouldn't build the cached modules (that have errors) when not allowing
+// errors
 // RUN: not %clang_cc1 -fsyntax-only -fmodules \
 // RUN:   -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/error \
 // RUN:   -x objective-c %s
 // RUN: find %t -name "error-*.pcm" | not grep error
 
-// Should build the cached module when allowing errors
+// Should build the cached modules when allowing errors
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/error \
 // RUN:   -x objective-c -verify %s
 // RUN: find %t -name "error-*.pcm" | grep error
+// RUN: find %t -name "use_error_a-*.pcm" | grep use_error_a
+// RUN: find %t -name "use_error_b-*.pcm" | grep use_error_b
 
-// Make sure there is still an error after the module is already in the cache
+// Check build when the modules are already cached
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/error \
 // RUN:   -x objective-c -verify %s
@@ -59,7 +87,7 @@ void test(Error *x) {
 // the verify would fail as it would be the PCH error instead)
 // RUN: %clang_cc1 -fsyntax-only -fmodules \
 // RUN:   -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/error \
-// RUN:   -x objective-c -verify=notallowerror %s
+// RUN:   -x objective-c  %s -verify=notallowerror
 
 // allow-pcm-with-compiler-errors should also allow errors in PCH
 // RUN: %clang_cc1 -fallow-pcm-with-compiler-errors -x objective-c \
@@ -71,15 +99,17 @@ void test(Error *x) {
 // CHECK-NEXT: @end
 // CHECK: void test(Error *x)
 
-// RUN: c-index-test -code-completion-at=%s:9:6 %s -fmodules -fmodules-cache-path=%t \
+// RUN: c-index-test -code-completion-at=%s:12:6 %s -fmodules -fmodules-cache-path=%t \
 // RUN:   -Xclang -fallow-pcm-with-compiler-errors -I %S/Inputs/error | FileCheck -check-prefix=COMPLETE %s
 // COMPLETE: ObjCInstanceMethodDecl:{ResultType int}{TypedText method}
 // COMPLETE: ObjCInstanceMethodDecl:{ResultType id}{TypedText method2}
 
 // RUN: c-index-test -test-load-source local %s -fmodules -fmodules-cache-path=%t \
 // RUN:   -Xclang -fallow-pcm-with-compiler-errors -I %S/Inputs/error | FileCheck -check-prefix=SOURCE %s
-// SOURCE: load-module-with-errors.m:8:6: FunctionDecl=test:8:6 (Definition) Extent=[8:1 - 10:2]
-// SOURCE: load-module-with-errors.m:8:18: ParmDecl=x:8:18 (Definition) Extent=[8:11 - 8:19]
-// SOURCE: load-module-with-errors.m:8:11: ObjCClassRef=Error:3:12 Extent=[8:11 - 8:16]
-// SOURCE: load-module-with-errors.m:8:21: CompoundStmt= Extent=[8:21 - 10:2]
-// SOURCE: load-module-with-errors.m:9:3: ObjCMessageExpr=method:4:8 Extent=[9:3 - 9:13]
+// SOURCE: load-module-with-errors.m:9:6: FunctionDecl=test:9:6 (Definition) Extent=[9:1 - 13:2]
+// SOURCE: load-module-with-errors.m:9:18: ParmDecl=x:9:18 (Definition) Extent=[9:11 - 9:19]
+// SOURCE: load-module-with-errors.m:9:11: ObjCClassRef=Error:5:12 Extent=[9:11 - 9:16]
+// SOURCE: load-module-with-errors.m:9:21: CompoundStmt= Extent=[9:21 - 13:2]
+// SOURCE: load-module-with-errors.m:10:3: CallExpr=funca:3:6 Extent=[10:3 - 10:11]
+// SOURCE: load-module-with-errors.m:11:3: CallExpr=funcb:3:6 Extent=[11:3 - 11:11]
+// SOURCE: load-module-with-errors.m:12:3: ObjCMessageExpr=method:6:8 Extent=[12:3 - 12:13]

From a623051dc3011fe6f41bddab4024fc213f60282b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 16 Apr 2021 17:45:40 -0700
Subject: [PATCH 14/52] [TableGen] Replace two SmallDenseSets with SmallSets.

The key here is HwMode indices. They're going to be small numbers,
contiguous, and only a few different values. I don't think we need
to go through the SmallDenseSet hashing.

A BitVector would be even better, but we don't have the upper
bound here.
---
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 43486880933c..4ab2f1c69911 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -111,7 +111,7 @@ bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
   bool ContainsDefault = false;
   MVT DT = MVT::Other;
 
-  SmallDenseSet<unsigned, 4> Modes;
+  SmallSet<unsigned, 4> Modes;
   for (const auto &P : VVT) {
     unsigned M = P.first;
     Modes.insert(M);
@@ -224,7 +224,7 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
   if (HaveDefault != VTSHaveDefault)
     return false;
 
-  SmallDenseSet<unsigned, 4> Modes;
+  SmallSet<unsigned, 4> Modes;
   for (auto &I : *this)
     Modes.insert(I.first);
   for (const auto &I : VTS)

From b2a3d31eed3b70baf2b8123d940cadb6139d8f2b Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 16 Apr 2021 19:08:23 -0700
Subject: [PATCH 15/52] [ELF] Simplify R_386_TLS_GD computation. NFC

---
 lld/ELF/InputSection.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 0ee1b0885d66..17fa7db15088 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -839,7 +839,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
   case R_TLSGD_GOT:
     return in.got->getGlobalDynOffset(sym) + a;
   case R_TLSGD_GOTPLT:
-    return in.got->getVA() + in.got->getGlobalDynOffset(sym) + a - in.gotPlt->getVA();
+    return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA();
   case R_TLSGD_PC:
     return in.got->getGlobalDynAddr(sym) + a - p;
   case R_TLSLD_GOTPLT:

From fb69b92c7b33b6a9b9be4278cd1e2d580f48ce8a Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Sat, 17 Apr 2021 05:06:13 +0000
Subject: [PATCH 16/52] [libc][NFC] Add common template test class for sqrt,
 sqrtf and sqrtl.

---
 libc/test/src/math/SqrtTest.h     | 73 +++++++++++++++++++++++++++++++
 libc/test/src/math/sqrt_test.cpp  | 62 +++-----------------------
 libc/test/src/math/sqrtf_test.cpp | 62 +++-----------------------
 libc/test/src/math/sqrtl_test.cpp | 62 +++-----------------------
 4 files changed, 88 insertions(+), 171 deletions(-)
 create mode 100644 libc/test/src/math/SqrtTest.h

diff --git a/libc/test/src/math/SqrtTest.h b/libc/test/src/math/SqrtTest.h
new file mode 100644
index 000000000000..56916a501329
--- /dev/null
+++ b/libc/test/src/math/SqrtTest.h
@@ -0,0 +1,73 @@
+//===-- Utility class to test fabs[f|l] -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/Test.h"
+
+#include <math.h>
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+template <typename T> class SqrtTest : public __llvm_libc::testing::Test {
+
+  DECLARE_SPECIAL_CONSTANTS(T)
+
+  static constexpr UIntType HiddenBit =
+      UIntType(1) << __llvm_libc::fputil::MantissaWidth<T>::value;
+
+public:
+  typedef T (*SqrtFunc)(T);
+
+  void testSpecialNumbers(SqrtFunc func) {
+    ASSERT_FP_EQ(aNaN, func(aNaN));
+    ASSERT_FP_EQ(inf, func(inf));
+    ASSERT_FP_EQ(aNaN, func(negInf));
+    ASSERT_FP_EQ(0.0, func(0.0));
+    ASSERT_FP_EQ(-0.0, func(-0.0));
+    ASSERT_FP_EQ(aNaN, func(T(-1.0)));
+    ASSERT_FP_EQ(T(1.0), func(T(1.0)));
+    ASSERT_FP_EQ(T(2.0), func(T(4.0)));
+    ASSERT_FP_EQ(T(3.0), func(T(9.0)));
+  }
+
+  void testDenormalValues(SqrtFunc func) {
+    for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) {
+      FPBits denormal(T(0.0));
+      denormal.encoding.mantissa = mant;
+
+      ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, T(denormal),
+                        func(denormal), T(0.5));
+    }
+
+    constexpr UIntType count = 1'000'001;
+    constexpr UIntType step = HiddenBit / count;
+    for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
+      T x = *reinterpret_cast<T *>(&v);
+      ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, func(x), 0.5);
+    }
+  }
+
+  void testNormalRange(SqrtFunc func) {
+    constexpr UIntType count = 10'000'001;
+    constexpr UIntType step = UIntType(-1) / count;
+    for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
+      T x = *reinterpret_cast<T *>(&v);
+      if (isnan(x) || (x < 0)) {
+        continue;
+      }
+      ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, func(x), 0.5);
+    }
+  }
+};
+
+#define LIST_SQRT_TESTS(T, func)                                               \
+  using LlvmLibcSqrtTest = SqrtTest<T>;                                        \
+  TEST_F(LlvmLibcSqrtTest, SpecialNumbers) { testSpecialNumbers(&func); }      \
+  TEST_F(LlvmLibcSqrtTest, DenormalValues) { testDenormalValues(&func); }      \
+  TEST_F(LlvmLibcSqrtTest, NormalRange) { testNormalRange(&func); }
diff --git a/libc/test/src/math/sqrt_test.cpp b/libc/test/src/math/sqrt_test.cpp
index 0be62d4e41ec..237264895bbd 100644
--- a/libc/test/src/math/sqrt_test.cpp
+++ b/libc/test/src/math/sqrt_test.cpp
@@ -1,65 +1,13 @@
-//===-- Unittests for sqrt -----------------------------------------------===//
+//===-- Unittests for sqrt ------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
-#include "src/math/sqrt.h"
-#include "utils/FPUtil/FPBits.h"
-#include "utils/FPUtil/TestHelpers.h"
-#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <math.h>
-
-using FPBits = __llvm_libc::fputil::FPBits<double>;
-using UIntType = typename FPBits::UIntType;
-
-namespace mpfr = __llvm_libc::testing::mpfr;
-
-constexpr UIntType HiddenBit =
-    UIntType(1) << __llvm_libc::fputil::MantissaWidth<double>::value;
-
-DECLARE_SPECIAL_CONSTANTS(double)
+#include "SqrtTest.h"
 
-TEST(LlvmLibcSqrtTest, SpecialValues) {
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrt(aNaN));
-  ASSERT_FP_EQ(inf, __llvm_libc::sqrt(inf));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrt(negInf));
-  ASSERT_FP_EQ(0.0, __llvm_libc::sqrt(0.0));
-  ASSERT_FP_EQ(-0.0, __llvm_libc::sqrt(-0.0));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrt(-1.0));
-  ASSERT_FP_EQ(1.0, __llvm_libc::sqrt(1.0));
-  ASSERT_FP_EQ(2.0, __llvm_libc::sqrt(4.0));
-  ASSERT_FP_EQ(3.0, __llvm_libc::sqrt(9.0));
-}
-
-TEST(LlvmLibcSqrtTest, DenormalValues) {
-  for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) {
-    FPBits denormal(0.0);
-    denormal.encoding.mantissa = mant;
-
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, double(denormal),
-                      __llvm_libc::sqrt(denormal), 0.5);
-  }
-
-  constexpr UIntType count = 1'000'001;
-  constexpr UIntType step = HiddenBit / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    double x = *reinterpret_cast<double *>(&v);
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrt(x), 0.5);
-  }
-}
-
-TEST(LlvmLibcSqrtTest, InDoubleRange) {
-  constexpr UIntType count = 10'000'001;
-  constexpr UIntType step = UIntType(-1) / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    double x = *reinterpret_cast<double *>(&v);
-    if (isnan(x) || (x < 0)) {
-      continue;
-    }
+#include "src/math/sqrt.h"
 
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrt(x), 0.5);
-  }
-}
+LIST_SQRT_TESTS(double, __llvm_libc::sqrt)
diff --git a/libc/test/src/math/sqrtf_test.cpp b/libc/test/src/math/sqrtf_test.cpp
index 91f4a91633fc..c7681d01569a 100644
--- a/libc/test/src/math/sqrtf_test.cpp
+++ b/libc/test/src/math/sqrtf_test.cpp
@@ -1,65 +1,13 @@
-//===-- Unittests for sqrtf -----------------------------------------------===//
+//===-- Unittests for sqrtf------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
-#include "src/math/sqrtf.h"
-#include "utils/FPUtil/FPBits.h"
-#include "utils/FPUtil/TestHelpers.h"
-#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <math.h>
-
-using FPBits = __llvm_libc::fputil::FPBits<float>;
-using UIntType = typename FPBits::UIntType;
-
-namespace mpfr = __llvm_libc::testing::mpfr;
-
-constexpr UIntType HiddenBit =
-    UIntType(1) << __llvm_libc::fputil::MantissaWidth<float>::value;
-
-DECLARE_SPECIAL_CONSTANTS(float)
+#include "SqrtTest.h"
 
-TEST(LlvmLibcSqrtfTest, SpecialValues) {
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtf(aNaN));
-  ASSERT_FP_EQ(inf, __llvm_libc::sqrtf(inf));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtf(negInf));
-  ASSERT_FP_EQ(0.0f, __llvm_libc::sqrtf(0.0f));
-  ASSERT_FP_EQ(-0.0f, __llvm_libc::sqrtf(-0.0f));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtf(-1.0f));
-  ASSERT_FP_EQ(1.0f, __llvm_libc::sqrtf(1.0f));
-  ASSERT_FP_EQ(2.0f, __llvm_libc::sqrtf(4.0f));
-  ASSERT_FP_EQ(3.0f, __llvm_libc::sqrtf(9.0f));
-}
-
-TEST(LlvmLibcSqrtfTest, DenormalValues) {
-  for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) {
-    FPBits denormal(0.0f);
-    denormal.encoding.mantissa = mant;
-
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, float(denormal),
-                      __llvm_libc::sqrtf(denormal), 0.5);
-  }
-
-  constexpr UIntType count = 1'000'001;
-  constexpr UIntType step = HiddenBit / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    float x = *reinterpret_cast<float *>(&v);
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrtf(x), 0.5);
-  }
-}
-
-TEST(LlvmLibcSqrtfTest, InFloatRange) {
-  constexpr UIntType count = 10'000'001;
-  constexpr UIntType step = UIntType(-1) / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    float x = *reinterpret_cast<float *>(&v);
-    if (isnan(x) || (x < 0)) {
-      continue;
-    }
+#include "src/math/sqrtf.h"
 
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrtf(x), 0.5);
-  }
-}
+LIST_SQRT_TESTS(float, __llvm_libc::sqrtf)
diff --git a/libc/test/src/math/sqrtl_test.cpp b/libc/test/src/math/sqrtl_test.cpp
index 1701bfdbc67b..c48ebb08444d 100644
--- a/libc/test/src/math/sqrtl_test.cpp
+++ b/libc/test/src/math/sqrtl_test.cpp
@@ -1,65 +1,13 @@
-//===-- Unittests for sqrtl ----------------------------------------------===//
+//===-- Unittests for sqrtl------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
-#include "src/math/sqrtl.h"
-#include "utils/FPUtil/FPBits.h"
-#include "utils/FPUtil/TestHelpers.h"
-#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <math.h>
-
-using FPBits = __llvm_libc::fputil::FPBits<long double>;
-using UIntType = typename FPBits::UIntType;
-
-namespace mpfr = __llvm_libc::testing::mpfr;
-
-constexpr UIntType HiddenBit =
-    UIntType(1) << __llvm_libc::fputil::MantissaWidth<long double>::value;
-
-DECLARE_SPECIAL_CONSTANTS(long double)
+#include "SqrtTest.h"
 
-TEST(LlvmLibcSqrtlTest, SpecialValues) {
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtl(aNaN));
-  ASSERT_FP_EQ(inf, __llvm_libc::sqrtl(inf));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtl(negInf));
-  ASSERT_FP_EQ(0.0L, __llvm_libc::sqrtl(0.0L));
-  ASSERT_FP_EQ(-0.0L, __llvm_libc::sqrtl(-0.0L));
-  ASSERT_FP_EQ(aNaN, __llvm_libc::sqrtl(-1.0L));
-  ASSERT_FP_EQ(1.0L, __llvm_libc::sqrtl(1.0L));
-  ASSERT_FP_EQ(2.0L, __llvm_libc::sqrtl(4.0L));
-  ASSERT_FP_EQ(3.0L, __llvm_libc::sqrtl(9.0L));
-}
-
-TEST(LlvmLibcSqrtlTest, DenormalValues) {
-  for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) {
-    FPBits denormal(0.0L);
-    denormal.encoding.mantissa = mant;
-
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, static_cast<long double>(denormal),
-                      __llvm_libc::sqrtl(denormal), 0.5);
-  }
-
-  constexpr UIntType count = 1'000'001;
-  constexpr UIntType step = HiddenBit / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    long double x = *reinterpret_cast<long double *>(&v);
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrtl(x), 0.5);
-  }
-}
-
-TEST(LlvmLibcSqrtlTest, InLongDoubleRange) {
-  constexpr UIntType count = 10'000'001;
-  constexpr UIntType step = UIntType(-1) / count;
-  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    long double x = *reinterpret_cast<long double *>(&v);
-    if (isnan(x) || (x < 0)) {
-      continue;
-    }
+#include "src/math/sqrtl.h"
 
-    ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, __llvm_libc::sqrtl(x), 0.5);
-  }
-}
+LIST_SQRT_TESTS(long double, __llvm_libc::sqrtl)

From fb706e086c70de9dd24852f1ef1d0f411bd501a1 Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Wed, 14 Apr 2021 06:04:11 +0000
Subject: [PATCH 17/52] [libc][NFC] Make conversion from FPBits to the float
 point type explicit.

This will help us catch errors like the ones fixed by the commit
31ed45d9cfd5da2bf4f1d7ddba54122df6fc91fa
---
 libc/test/src/math/FDimTest.h                | 14 +++++-----
 libc/test/src/math/FmaTest.h                 | 28 +++++++++++---------
 libc/test/src/math/HypotTest.h               | 14 +++++-----
 libc/test/src/math/ILogbTest.h               | 15 ++++++-----
 libc/test/src/math/LdExpTest.h               | 10 +++----
 libc/test/src/math/NextAfterTest.h           | 10 +++----
 libc/test/src/math/RIntTest.h                | 14 +++++-----
 libc/test/src/math/RemQuoTest.h              | 14 +++++-----
 libc/test/src/math/RoundToIntegerTest.h      | 18 ++++++-------
 libc/test/src/math/SqrtTest.h                |  4 +--
 libc/test/src/math/frexp_test.cpp            |  2 +-
 libc/test/src/math/frexpf_test.cpp           |  2 +-
 libc/test/src/math/frexpl_test.cpp           |  2 +-
 libc/utils/FPUtil/FPBits.h                   |  4 +--
 libc/utils/FPUtil/Hypot.h                    |  6 ++---
 libc/utils/FPUtil/ManipulationFunctions.h    | 15 ++++++-----
 libc/utils/FPUtil/NearestIntegerOperations.h |  2 +-
 libc/utils/FPUtil/NormalFloat.h              | 14 +++++-----
 libc/utils/FPUtil/TestHelpers.h              | 10 +++----
 19 files changed, 101 insertions(+), 97 deletions(-)

diff --git a/libc/test/src/math/FDimTest.h b/libc/test/src/math/FDimTest.h
index d632b2355638..1a5344006ff3 100644
--- a/libc/test/src/math/FDimTest.h
+++ b/libc/test/src/math/FDimTest.h
@@ -57,7 +57,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test {
     constexpr UIntType step = UIntType(-1) / count;
     for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count;
          ++i, v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w);
+      T x = T(FPBits(v)), y = T(FPBits(w));
       if (isnan(x) || isinf(x))
         continue;
       if (isnan(y) || isinf(y))
@@ -74,9 +74,9 @@ class FDimTestTemplate : public __llvm_libc::testing::Test {
 private:
   // constexpr does not work on FPBits yet, so we cannot have these constants as
   // static.
-  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
-  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
-  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
-  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
-  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
-};
\ No newline at end of file
+  const T nan = T(__llvm_libc::fputil::FPBits<T>::buildNaN(1));
+  const T inf = T(__llvm_libc::fputil::FPBits<T>::inf());
+  const T negInf = T(__llvm_libc::fputil::FPBits<T>::negInf());
+  const T zero = T(__llvm_libc::fputil::FPBits<T>::zero());
+  const T negZero = T(__llvm_libc::fputil::FPBits<T>::negZero());
+};
diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h
index 9f90c8627af9..eb31bf8a38de 100644
--- a/libc/test/src/math/FmaTest.h
+++ b/libc/test/src/math/FmaTest.h
@@ -23,11 +23,11 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
   using Func = T (*)(T, T, T);
   using FPBits = __llvm_libc::fputil::FPBits<T>;
   using UIntType = typename FPBits::UIntType;
-  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
-  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
-  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
-  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
-  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
+  const T nan = T(__llvm_libc::fputil::FPBits<T>::buildNaN(1));
+  const T inf = T(__llvm_libc::fputil::FPBits<T>::inf());
+  const T negInf = T(__llvm_libc::fputil::FPBits<T>::negInf());
+  const T zero = T(__llvm_libc::fputil::FPBits<T>::zero());
+  const T negZero = T(__llvm_libc::fputil::FPBits<T>::negZero());
 
   UIntType getRandomBitPattern() {
     UIntType bits{0};
@@ -50,16 +50,16 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
     EXPECT_FP_EQ(func(inf, negInf, nan), nan);
 
     // Test underflow rounding up.
-    EXPECT_FP_EQ(func(T(0.5), FPBits(FPBits::minSubnormal),
-                      FPBits(FPBits::minSubnormal)),
-                 FPBits(UIntType(2)));
+    EXPECT_FP_EQ(func(T(0.5), T(FPBits(FPBits::minSubnormal)),
+                      T(FPBits(FPBits::minSubnormal))),
+                 T(FPBits(UIntType(2))));
     // Test underflow rounding down.
-    FPBits v(FPBits::minNormal + UIntType(1));
+    T v = T(FPBits(FPBits::minNormal + UIntType(1)));
     EXPECT_FP_EQ(
-        func(T(1) / T(FPBits::minNormal << 1), v, FPBits(FPBits::minNormal)),
+        func(T(1) / T(FPBits::minNormal << 1), v, T(FPBits(FPBits::minNormal))),
         v);
     // Test overflow.
-    FPBits z(FPBits::maxNormal);
+    T z = T(FPBits(FPBits::maxNormal));
     EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);
   }
 
@@ -70,7 +70,8 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal;
          v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal;
          v += step, w -= step) {
-      T x = FPBits(getRandomBitPattern()), y = FPBits(v), z = FPBits(w);
+      T x = T(FPBits(getRandomBitPattern())), y = T(FPBits(v)),
+        z = T(FPBits(w));
       T result = func(x, y, z);
       mpfr::TernaryInput<T> input{x, y, z};
       ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);
@@ -83,7 +84,8 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal;
          v <= FPBits::maxNormal && w >= FPBits::minNormal;
          v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w), z = FPBits(getRandomBitPattern());
+      T x = T(FPBits(v)), y = T(FPBits(w)),
+        z = T(FPBits(getRandomBitPattern()));
       T result = func(x, y, z);
       mpfr::TernaryInput<T> input{x, y, z};
       ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);
diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h
index 34b1ff6a08b6..697d60441288 100644
--- a/libc/test/src/math/HypotTest.h
+++ b/libc/test/src/math/HypotTest.h
@@ -25,11 +25,11 @@ class HypotTestTemplate : public __llvm_libc::testing::Test {
   using Func = T (*)(T, T);
   using FPBits = __llvm_libc::fputil::FPBits<T>;
   using UIntType = typename FPBits::UIntType;
-  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
-  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
-  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
-  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
-  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
+  const T nan = T(__llvm_libc::fputil::FPBits<T>::buildNaN(1));
+  const T inf = T(__llvm_libc::fputil::FPBits<T>::inf());
+  const T negInf = T(__llvm_libc::fputil::FPBits<T>::negInf());
+  const T zero = T(__llvm_libc::fputil::FPBits<T>::zero());
+  const T negZero = T(__llvm_libc::fputil::FPBits<T>::negZero());
 
 public:
   void testSpecialNumbers(Func func) {
@@ -52,7 +52,7 @@ class HypotTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal;
          v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal;
          v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w);
+      T x = T(FPBits(v)), y = T(FPBits(w));
       T result = func(x, y);
       mpfr::BinaryInput<T> input{x, y};
       ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5);
@@ -65,7 +65,7 @@ class HypotTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal;
          v <= FPBits::maxNormal && w >= FPBits::minNormal;
          v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w);
+      T x = T(FPBits(v)), y = T(FPBits(w));
       T result = func(x, y);
       mpfr::BinaryInput<T> input{x, y};
       ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5);
diff --git a/libc/test/src/math/ILogbTest.h b/libc/test/src/math/ILogbTest.h
index 724cc9571670..bf7996075dde 100644
--- a/libc/test/src/math/ILogbTest.h
+++ b/libc/test/src/math/ILogbTest.h
@@ -22,13 +22,14 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test {
 
   template <typename T>
   void testSpecialNumbers(typename ILogbFunc<T>::Func func) {
-    EXPECT_EQ(FP_ILOGB0, func(__llvm_libc::fputil::FPBits<T>::zero()));
-    EXPECT_EQ(FP_ILOGB0, func(__llvm_libc::fputil::FPBits<T>::negZero()));
+    EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits<T>::zero())));
+    EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits<T>::negZero())));
 
-    EXPECT_EQ(FP_ILOGBNAN, func(__llvm_libc::fputil::FPBits<T>::buildNaN(1)));
+    EXPECT_EQ(FP_ILOGBNAN,
+              func(T(__llvm_libc::fputil::FPBits<T>::buildNaN(1))));
 
-    EXPECT_EQ(INT_MAX, func(__llvm_libc::fputil::FPBits<T>::inf()));
-    EXPECT_EQ(INT_MAX, func(__llvm_libc::fputil::FPBits<T>::negInf()));
+    EXPECT_EQ(INT_MAX, func(T(__llvm_libc::fputil::FPBits<T>::inf())));
+    EXPECT_EQ(INT_MAX, func(T(__llvm_libc::fputil::FPBits<T>::negInf())));
   }
 
   template <typename T> void testPowersOfTwo(typename ILogbFunc<T>::Func func) {
@@ -78,7 +79,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test {
         (FPBits::maxSubnormal - FPBits::minSubnormal) / count;
     for (UIntType v = FPBits::minSubnormal; v <= FPBits::maxSubnormal;
          v += step) {
-      T x = FPBits(v);
+      T x = T(FPBits(v));
       if (isnan(x) || isinf(x) || x == 0.0)
         continue;
 
@@ -94,7 +95,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test {
     constexpr UIntType count = 1000001;
     constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count;
     for (UIntType v = FPBits::minNormal; v <= FPBits::maxNormal; v += step) {
-      T x = FPBits(v);
+      T x = T(FPBits(v));
       if (isnan(x) || isinf(x) || x == 0.0)
         continue;
 
diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h
index a2c1b54b1822..046ec04f130d 100644
--- a/libc/test/src/math/LdExpTest.h
+++ b/libc/test/src/math/LdExpTest.h
@@ -28,11 +28,11 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test {
   // A normalized mantissa to be used with tests.
   static constexpr UIntType mantissa = NormalFloat::one + 0x1234;
 
-  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
-  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
-  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
-  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
-  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
+  const T zero = T(__llvm_libc::fputil::FPBits<T>::zero());
+  const T negZero = T(__llvm_libc::fputil::FPBits<T>::negZero());
+  const T inf = T(__llvm_libc::fputil::FPBits<T>::inf());
+  const T negInf = T(__llvm_libc::fputil::FPBits<T>::negInf());
+  const T nan = T(__llvm_libc::fputil::FPBits<T>::buildNaN(1));
 
 public:
   typedef T (*LdExpFunc)(T, int);
diff --git a/libc/test/src/math/NextAfterTest.h b/libc/test/src/math/NextAfterTest.h
index 8ba3f3096a55..9c53ab719718 100644
--- a/libc/test/src/math/NextAfterTest.h
+++ b/libc/test/src/math/NextAfterTest.h
@@ -29,11 +29,11 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test {
   static constexpr int bitWidthOfType = sizeof(T) * 8;
 #endif
 
-  const T zero = FPBits::zero();
-  const T negZero = FPBits::negZero();
-  const T inf = FPBits::inf();
-  const T negInf = FPBits::negInf();
-  const T nan = FPBits::buildNaN(1);
+  const T zero = T(FPBits::zero());
+  const T negZero = T(FPBits::negZero());
+  const T inf = T(FPBits::inf());
+  const T negInf = T(FPBits::negInf());
+  const T nan = T(FPBits::buildNaN(1));
   const UIntType minSubnormal = FPBits::minSubnormal;
   const UIntType maxSubnormal = FPBits::maxSubnormal;
   const UIntType minNormal = FPBits::minNormal;
diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h
index 18331ecdad08..edcc27993299 100644
--- a/libc/test/src/math/RIntTest.h
+++ b/libc/test/src/math/RIntTest.h
@@ -33,11 +33,11 @@ class RIntTestTemplate : public __llvm_libc::testing::Test {
   using FPBits = __llvm_libc::fputil::FPBits<T>;
   using UIntType = typename FPBits::UIntType;
 
-  const T zero = FPBits::zero();
-  const T negZero = FPBits::negZero();
-  const T inf = FPBits::inf();
-  const T negInf = FPBits::negInf();
-  const T nan = FPBits::buildNaN(1);
+  const T zero = T(FPBits::zero());
+  const T negZero = T(FPBits::negZero());
+  const T inf = T(FPBits::inf());
+  const T negInf = T(FPBits::negInf());
+  const T nan = T(FPBits::buildNaN(1));
 
   static inline mpfr::RoundingMode toMPFRRoundingMode(int mode) {
     switch (mode) {
@@ -98,7 +98,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test {
         (FPBits::maxSubnormal - FPBits::minSubnormal) / count;
     for (UIntType i = FPBits::minSubnormal; i <= FPBits::maxSubnormal;
          i += step) {
-      T x = FPBits(i);
+      T x = T(FPBits(i));
       for (int mode : roundingModes) {
         __llvm_libc::fputil::setRound(mode);
         mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode);
@@ -111,7 +111,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test {
     constexpr UIntType count = 1000001;
     constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count;
     for (UIntType i = FPBits::minNormal; i <= FPBits::maxNormal; i += step) {
-      T x = FPBits(i);
+      T x = T(FPBits(i));
       // In normal range on x86 platforms, the long double implicit 1 bit can be
       // zero making the numbers NaN. We will skip them.
       if (isnan(x)) {
diff --git a/libc/test/src/math/RemQuoTest.h b/libc/test/src/math/RemQuoTest.h
index f643079b89ac..7b442ffb387e 100644
--- a/libc/test/src/math/RemQuoTest.h
+++ b/libc/test/src/math/RemQuoTest.h
@@ -23,11 +23,11 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test {
   using FPBits = __llvm_libc::fputil::FPBits<T>;
   using UIntType = typename FPBits::UIntType;
 
-  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
-  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
-  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
-  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
-  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
+  const T zero = T(__llvm_libc::fputil::FPBits<T>::zero());
+  const T negZero = T(__llvm_libc::fputil::FPBits<T>::negZero());
+  const T inf = T(__llvm_libc::fputil::FPBits<T>::inf());
+  const T negInf = T(__llvm_libc::fputil::FPBits<T>::negInf());
+  const T nan = T(__llvm_libc::fputil::FPBits<T>::buildNaN(1));
 
 public:
   typedef T (*RemQuoFunc)(T, T, int *);
@@ -101,7 +101,7 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minSubnormal, w = FPBits::maxSubnormal;
          v <= FPBits::maxSubnormal && w >= FPBits::minSubnormal;
          v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w);
+      T x = T(FPBits(v)), y = T(FPBits(w));
       mpfr::BinaryOutput<T> result;
       mpfr::BinaryInput<T> input{x, y};
       result.f = func(x, y, &result.i);
@@ -115,7 +115,7 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test {
     for (UIntType v = FPBits::minNormal, w = FPBits::maxNormal;
          v <= FPBits::maxNormal && w >= FPBits::minNormal;
          v += step, w -= step) {
-      T x = FPBits(v), y = FPBits(w);
+      T x = T(FPBits(v)), y = T(FPBits(w));
       mpfr::BinaryOutput<T> result;
       mpfr::BinaryInput<T> input{x, y};
       result.f = func(x, y, &result.i);
diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h
index 0b83b9a1ed5c..c3d035195394 100644
--- a/libc/test/src/math/RoundToIntegerTest.h
+++ b/libc/test/src/math/RoundToIntegerTest.h
@@ -35,11 +35,11 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test {
   using FPBits = __llvm_libc::fputil::FPBits<F>;
   using UIntType = typename FPBits::UIntType;
 
-  const F zero = __llvm_libc::fputil::FPBits<F>::zero();
-  const F negZero = __llvm_libc::fputil::FPBits<F>::negZero();
-  const F inf = __llvm_libc::fputil::FPBits<F>::inf();
-  const F negInf = __llvm_libc::fputil::FPBits<F>::negInf();
-  const F nan = __llvm_libc::fputil::FPBits<F>::buildNaN(1);
+  const F zero = F(__llvm_libc::fputil::FPBits<F>::zero());
+  const F negZero = F(__llvm_libc::fputil::FPBits<F>::negZero());
+  const F inf = F(__llvm_libc::fputil::FPBits<F>::inf());
+  const F negInf = F(__llvm_libc::fputil::FPBits<F>::negInf());
+  const F nan = F(__llvm_libc::fputil::FPBits<F>::buildNaN(1));
   static constexpr I IntegerMin = I(1) << (sizeof(I) * 8 - 1);
   static constexpr I IntegerMax = -(IntegerMin + 1);
 
@@ -139,7 +139,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test {
     bits.encoding.sign = 1;
     bits.encoding.mantissa = 0;
 
-    F x = bits;
+    F x = F(bits);
     long mpfrResult;
     bool erangeflag = mpfr::RoundToLong(x, mpfrResult);
     ASSERT_FALSE(erangeflag);
@@ -204,7 +204,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test {
     bits.encoding.mantissa =
         UIntType(0x1) << (__llvm_libc::fputil::MantissaWidth<F>::value - 1);
 
-    F x = bits;
+    F x = F(bits);
     if (TestModes) {
       for (int m : roundingModes) {
         __llvm_libc::fputil::setRound(m);
@@ -228,7 +228,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test {
         (FPBits::maxSubnormal - FPBits::minSubnormal) / count;
     for (UIntType i = FPBits::minSubnormal; i <= FPBits::maxSubnormal;
          i += step) {
-      F x = FPBits(i);
+      F x = F(FPBits(i));
       if (x == F(0.0))
         continue;
       // All subnormal numbers should round to zero.
@@ -270,7 +270,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test {
     constexpr UIntType count = 1000001;
     constexpr UIntType step = (FPBits::maxNormal - FPBits::minNormal) / count;
     for (UIntType i = FPBits::minNormal; i <= FPBits::maxNormal; i += step) {
-      F x = FPBits(i);
+      F x = F(FPBits(i));
       // In normal range on x86 platforms, the long double implicit 1 bit can be
       // zero making the numbers NaN. We will skip them.
       if (isnan(x)) {
diff --git a/libc/test/src/math/SqrtTest.h b/libc/test/src/math/SqrtTest.h
index 56916a501329..56a16bb09754 100644
--- a/libc/test/src/math/SqrtTest.h
+++ b/libc/test/src/math/SqrtTest.h
@@ -41,8 +41,8 @@ template <typename T> class SqrtTest : public __llvm_libc::testing::Test {
       FPBits denormal(T(0.0));
       denormal.encoding.mantissa = mant;
 
-      ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, T(denormal),
-                        func(denormal), T(0.5));
+      ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, T(denormal), func(T(denormal)),
+                        T(0.5));
     }
 
     constexpr UIntType count = 1'000'001;
diff --git a/libc/test/src/math/frexp_test.cpp b/libc/test/src/math/frexp_test.cpp
index d9fcae4042bb..2d8ae1c9ca1a 100644
--- a/libc/test/src/math/frexp_test.cpp
+++ b/libc/test/src/math/frexp_test.cpp
@@ -136,7 +136,7 @@ TEST(LlvmLibcFrexpTest, InDoubleRange) {
   constexpr UIntType count = 1000001;
   constexpr UIntType step = UIntType(-1) / count;
   for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    double x = FPBits(v);
+    double x = double(FPBits(v));
     if (isnan(x) || isinf(x) || x == 0.0)
       continue;
 
diff --git a/libc/test/src/math/frexpf_test.cpp b/libc/test/src/math/frexpf_test.cpp
index 8d2fe307855d..a3a3da4530b8 100644
--- a/libc/test/src/math/frexpf_test.cpp
+++ b/libc/test/src/math/frexpf_test.cpp
@@ -143,7 +143,7 @@ TEST(LlvmLibcFrexpfTest, InFloatRange) {
   constexpr UIntType count = 1000001;
   constexpr UIntType step = UIntType(-1) / count;
   for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    float x = FPBits(v);
+    float x = float(FPBits(v));
     if (isnan(x) || isinf(x) || x == 0.0)
       continue;
 
diff --git a/libc/test/src/math/frexpl_test.cpp b/libc/test/src/math/frexpl_test.cpp
index ee18961e638b..6036178a3192 100644
--- a/libc/test/src/math/frexpl_test.cpp
+++ b/libc/test/src/math/frexpl_test.cpp
@@ -93,7 +93,7 @@ TEST(LlvmLibcFrexplTest, LongDoubleRange) {
   constexpr UIntType count = 10000000;
   constexpr UIntType step = UIntType(-1) / count;
   for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    long double x = FPBits(v);
+    long double x = static_cast<long double>(FPBits(v));
     if (isnan(x) || isinf(x) || x == 0.0l)
       continue;
 
diff --git a/libc/utils/FPUtil/FPBits.h b/libc/utils/FPUtil/FPBits.h
index b2c1e578125f..bc69829ca720 100644
--- a/libc/utils/FPUtil/FPBits.h
+++ b/libc/utils/FPUtil/FPBits.h
@@ -102,7 +102,7 @@ template <typename T> union FPBits {
 
   FPBits() : integer(0) {}
 
-  operator T() { return val; }
+  explicit operator T() { return val; }
 
   UIntType uintval() const { return integer; }
 
@@ -143,7 +143,7 @@ template <typename T> union FPBits {
   static T buildNaN(UIntType v) {
     FPBits<T> bits = inf();
     bits.encoding.mantissa = v;
-    return bits;
+    return T(bits);
   }
 };
 
diff --git a/libc/utils/FPUtil/Hypot.h b/libc/utils/FPUtil/Hypot.h
index 3585304b8508..adbd9f531db5 100644
--- a/libc/utils/FPUtil/Hypot.h
+++ b/libc/utils/FPUtil/Hypot.h
@@ -125,7 +125,7 @@ static inline T hypot(T x, T y) {
   FPBits_t x_bits(x), y_bits(y);
 
   if (x_bits.isInf() || y_bits.isInf()) {
-    return FPBits_t::inf();
+    return T(FPBits_t::inf());
   }
   if (x_bits.isNaN()) {
     return x;
@@ -208,7 +208,7 @@ static inline T hypot(T x, T y) {
       sum >>= 2;
       ++out_exp;
       if (out_exp >= FPBits_t::maxExponent) {
-        return FPBits_t::inf();
+        return T(FPBits_t::inf());
       }
     } else {
       // For denormal result, we simply move the leading bit of the result to
@@ -254,7 +254,7 @@ static inline T hypot(T x, T y) {
     Y -= one >> 1;
     ++out_exp;
     if (out_exp >= FPBits_t::maxExponent) {
-      return FPBits_t::inf();
+      return T(FPBits_t::inf());
     }
   }
 
diff --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h
index f0e5c8faa0a4..9bd54ec3e979 100644
--- a/libc/utils/FPUtil/ManipulationFunctions.h
+++ b/libc/utils/FPUtil/ManipulationFunctions.h
@@ -47,13 +47,14 @@ static inline T modf(T x, T &iptr) {
     return x;
   } else if (bits.isInf()) {
     iptr = x;
-    return bits.encoding.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
+    return bits.encoding.sign ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
   } else {
     iptr = trunc(x);
     if (x == iptr) {
       // If x is already an integer value, then return zero with the right
       // sign.
-      return bits.encoding.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
+      return bits.encoding.sign ? T(FPBits<T>::negZero())
+                                : T(FPBits<T>::zero());
     } else {
       return x - iptr;
     }
@@ -65,7 +66,7 @@ template <typename T,
 static inline T copysign(T x, T y) {
   FPBits<T> xbits(x);
   xbits.encoding.sign = FPBits<T>(y).encoding.sign;
-  return xbits;
+  return T(xbits);
 }
 
 template <typename T,
@@ -104,12 +105,12 @@ static inline T logb(T x) {
   if (bits.isZero()) {
     // TODO(Floating point exception): Raise div-by-zero exception.
     // TODO(errno): POSIX requires setting errno to ERANGE.
-    return FPBits<T>::negInf();
+    return T(FPBits<T>::negInf());
   } else if (bits.isNaN()) {
     return x;
   } else if (bits.isInf()) {
     // Return positive infinity.
-    return FPBits<T>::inf();
+    return T(FPBits<T>::inf());
   }
 
   NormalFloat<T> normal(bits);
@@ -131,11 +132,11 @@ static inline T ldexp(T x, int exp) {
   // calculating the limit.
   int expLimit = FPBits<T>::maxExponent + MantissaWidth<T>::value + 1;
   if (exp > expLimit)
-    return bits.encoding.sign ? FPBits<T>::negInf() : FPBits<T>::inf();
+    return bits.encoding.sign ? T(FPBits<T>::negInf()) : T(FPBits<T>::inf());
 
   // Similarly on the negative side we return zero early if |exp| is too small.
   if (exp < -expLimit)
-    return bits.encoding.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
+    return bits.encoding.sign ? T(FPBits<T>::negZero()) : T(FPBits<T>::zero());
 
   // For all other values, NormalFloat to T conversion handles it the right way.
   NormalFloat<T> normal(bits);
diff --git a/libc/utils/FPUtil/NearestIntegerOperations.h b/libc/utils/FPUtil/NearestIntegerOperations.h
index 7bb79bee377f..5ea4b41ccffb 100644
--- a/libc/utils/FPUtil/NearestIntegerOperations.h
+++ b/libc/utils/FPUtil/NearestIntegerOperations.h
@@ -51,7 +51,7 @@ static inline T trunc(T x) {
 
   int trimSize = MantissaWidth<T>::value - exponent;
   bits.encoding.mantissa = (bits.encoding.mantissa >> trimSize) << trimSize;
-  return bits;
+  return T(bits);
 }
 
 template <typename T,
diff --git a/libc/utils/FPUtil/NormalFloat.h b/libc/utils/FPUtil/NormalFloat.h
index 79e3d3448f52..07bb91bc4fd9 100644
--- a/libc/utils/FPUtil/NormalFloat.h
+++ b/libc/utils/FPUtil/NormalFloat.h
@@ -93,7 +93,7 @@ template <typename T> struct NormalFloat {
     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
     constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
     if (biasedExponent > maxExponentValue) {
-      return sign ? FPBits<T>::negInf() : FPBits<T>::inf();
+      return sign ? T(FPBits<T>::negInf()) : T(FPBits<T>::inf());
     }
 
     FPBits<T> result(T(0.0));
@@ -126,15 +126,15 @@ template <typename T> struct NormalFloat {
         // the overflow into the exponent.
         if (newMantissa == one)
           result.encoding.exponent = 1;
-        return result;
+        return T(result);
       } else {
-        return result;
+        return T(result);
       }
     }
 
     result.encoding.exponent = exponent + FPBits<T>::exponentBias;
     result.encoding.mantissa = mantissa;
-    return result;
+    return T(result);
   }
 
 private:
@@ -245,16 +245,16 @@ template <> inline NormalFloat<long double>::operator long double() const {
       } else {
         result.encoding.implicitBit = 0;
       }
-      return result;
+      return static_cast<long double>(result);
     } else {
-      return result;
+      return static_cast<long double>(result);
     }
   }
 
   result.encoding.exponent = biasedExponent;
   result.encoding.mantissa = mantissa;
   result.encoding.implicitBit = 1;
-  return result;
+  return static_cast<long double>(result);
 }
 #endif
 
diff --git a/libc/utils/FPUtil/TestHelpers.h b/libc/utils/FPUtil/TestHelpers.h
index 6ad6d3f13af9..263eace786fd 100644
--- a/libc/utils/FPUtil/TestHelpers.h
+++ b/libc/utils/FPUtil/TestHelpers.h
@@ -68,11 +68,11 @@ FPMatcher<T, C> getMatcher(T expectedValue) {
 #define DECLARE_SPECIAL_CONSTANTS(T)                                           \
   using FPBits = __llvm_libc::fputil::FPBits<T>;                               \
   using UIntType = typename FPBits::UIntType;                                  \
-  const T zero = FPBits::zero();                                               \
-  const T negZero = FPBits::negZero();                                         \
-  const T aNaN = FPBits::buildNaN(1);                                          \
-  const T inf = FPBits::inf();                                                 \
-  const T negInf = FPBits::negInf();
+  const T zero = T(FPBits::zero());                                            \
+  const T negZero = T(FPBits::negZero());                                      \
+  const T aNaN = T(FPBits::buildNaN(1));                                       \
+  const T inf = T(FPBits::inf());                                              \
+  const T negInf = T(FPBits::negInf());
 
 #define EXPECT_FP_EQ(expected, actual)                                         \
   EXPECT_THAT(                                                                 \

From bb8aa2ad1ae7050b82a76e38affd1294772b213b Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Sat, 17 Apr 2021 05:41:20 +0000
Subject: [PATCH 18/52] [libc][NFC] Use explicit conversion in modfl_test.

---
 libc/test/src/math/modfl_test.cpp | 36 +++++++++++++++++--------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/libc/test/src/math/modfl_test.cpp b/libc/test/src/math/modfl_test.cpp
index e8f292abddbe..fae4690b65cc 100644
--- a/libc/test/src/math/modfl_test.cpp
+++ b/libc/test/src/math/modfl_test.cpp
@@ -13,48 +13,52 @@
 #include "utils/UnitTest/Test.h"
 #include <math.h>
 
+typedef long double LD;
 using FPBits = __llvm_libc::fputil::FPBits<long double>;
 
 TEST(LlvmLibcmodflTest, SpecialNumbers) {
   long double integral;
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::modfl(FPBits::inf(), &integral));
-  EXPECT_TRUE(FPBits::inf() == integral);
+  EXPECT_TRUE(LD(FPBits::zero()) ==
+              __llvm_libc::modfl(LD(FPBits::inf()), &integral));
+  EXPECT_TRUE(LD(FPBits::inf()) == integral);
 
-  EXPECT_TRUE(FPBits::negZero() ==
-              __llvm_libc::modfl(FPBits::negInf(), &integral));
-  EXPECT_TRUE(FPBits::negInf() == integral);
+  EXPECT_TRUE(LD(FPBits::negZero()) ==
+              __llvm_libc::modfl(LD(FPBits::negInf()), &integral));
+  EXPECT_TRUE(LD(FPBits::negInf()) == integral);
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::modfl(FPBits::zero(), &integral));
+  EXPECT_TRUE(LD(FPBits::zero()) ==
+              __llvm_libc::modfl(LD(FPBits::zero()), &integral));
   EXPECT_TRUE(integral == 0.0l);
 
-  EXPECT_TRUE(FPBits::negZero() ==
-              __llvm_libc::modfl(FPBits::negZero(), &integral));
+  EXPECT_TRUE(LD(FPBits::negZero()) ==
+              __llvm_libc::modfl(LD(FPBits::negZero()), &integral));
   EXPECT_TRUE(integral == 0.0l);
 
   EXPECT_TRUE(
-      FPBits(__llvm_libc::modfl(FPBits::buildNaN(1), &integral)).isNaN());
+      FPBits(__llvm_libc::modfl(LD(FPBits::buildNaN(1)), &integral)).isNaN());
 }
 
 TEST(LlvmLibcmodflTest, Integers) {
   long double integral;
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::modfl(1.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::zero()) == __llvm_libc::modfl(1.0l, &integral));
   EXPECT_TRUE(integral == 1.0l);
 
-  EXPECT_TRUE(FPBits::negZero() == __llvm_libc::modfl(-1.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::negZero()) == __llvm_libc::modfl(-1.0l, &integral));
   EXPECT_TRUE(integral == -1.0l);
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::modfl(10.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::zero()) == __llvm_libc::modfl(10.0l, &integral));
   EXPECT_TRUE(integral == 10.0l);
 
-  EXPECT_TRUE(FPBits::negZero() == __llvm_libc::modfl(-10.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::negZero()) == __llvm_libc::modfl(-10.0l, &integral));
   EXPECT_TRUE(integral == -10.0l);
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::modfl(12345.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::zero()) == __llvm_libc::modfl(12345.0l, &integral));
   EXPECT_TRUE(integral == 12345.0l);
 
-  EXPECT_TRUE(FPBits::negZero() == __llvm_libc::modfl(-12345.0l, &integral));
+  EXPECT_TRUE(LD(FPBits::negZero()) ==
+              __llvm_libc::modfl(-12345.0l, &integral));
   EXPECT_TRUE(integral == -12345.0l);
 }
 
@@ -85,7 +89,7 @@ TEST(LlvmLibcModflTest, LongDoubleRange) {
   constexpr UIntType count = 10000000;
   constexpr UIntType step = UIntType(-1) / count;
   for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    long double x = FPBits(v);
+    long double x = LD(FPBits(v));
     if (isnan(x) || isinf(x) || x == 0.0l)
       continue;
 

From 7db1102a101d48b7e77092c409e3aee1a369b39a Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Sat, 17 Apr 2021 05:49:23 +0000
Subject: [PATCH 19/52] [libc]NFC] Use explicit conversion on frexpl_test and
 logbl_test.

---
 libc/test/src/math/frexpl_test.cpp | 17 ++++++++++-------
 libc/test/src/math/logbl_test.cpp  | 14 ++++++++------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/libc/test/src/math/frexpl_test.cpp b/libc/test/src/math/frexpl_test.cpp
index 6036178a3192..053782b0ba10 100644
--- a/libc/test/src/math/frexpl_test.cpp
+++ b/libc/test/src/math/frexpl_test.cpp
@@ -13,6 +13,7 @@
 #include "utils/UnitTest/Test.h"
 #include <math.h>
 
+typedef long double LD;
 using FPBits = __llvm_libc::fputil::FPBits<long double>;
 
 namespace mpfr = __llvm_libc::testing::mpfr;
@@ -20,19 +21,21 @@ namespace mpfr = __llvm_libc::testing::mpfr;
 TEST(LlvmLibcFrexplTest, SpecialNumbers) {
   int exponent;
 
-  EXPECT_TRUE(FPBits::inf() == __llvm_libc::frexpl(FPBits::inf(), &exponent));
-  EXPECT_TRUE(FPBits::negInf() ==
-              __llvm_libc::frexpl(FPBits::negInf(), &exponent));
+  EXPECT_TRUE(LD(FPBits::inf()) ==
+              __llvm_libc::frexpl(LD(FPBits::inf()), &exponent));
+  EXPECT_TRUE(LD(FPBits::negInf()) ==
+              __llvm_libc::frexpl(LD(FPBits::negInf()), &exponent));
 
-  EXPECT_TRUE(FPBits::zero() == __llvm_libc::frexpl(FPBits::zero(), &exponent));
+  EXPECT_TRUE(LD(FPBits::zero()) ==
+              __llvm_libc::frexpl(LD(FPBits::zero()), &exponent));
   EXPECT_EQ(exponent, 0);
 
-  EXPECT_TRUE(FPBits::negZero() ==
-              __llvm_libc::frexpl(FPBits::negZero(), &exponent));
+  EXPECT_TRUE(LD(FPBits::negZero()) ==
+              __llvm_libc::frexpl(LD(FPBits::negZero()), &exponent));
   EXPECT_EQ(exponent, 0);
 
   EXPECT_TRUE(
-      FPBits(__llvm_libc::frexpl(FPBits::buildNaN(1), &exponent)).isNaN());
+      FPBits(__llvm_libc::frexpl(LD(FPBits::buildNaN(1)), &exponent)).isNaN());
 }
 
 TEST(LlvmLibcFrexplTest, PowersOfTwo) {
diff --git a/libc/test/src/math/logbl_test.cpp b/libc/test/src/math/logbl_test.cpp
index f0d5afe328c6..41a724d95b32 100644
--- a/libc/test/src/math/logbl_test.cpp
+++ b/libc/test/src/math/logbl_test.cpp
@@ -12,16 +12,18 @@
 #include "utils/UnitTest/Test.h"
 #include <math.h>
 
+typedef long double LD;
 using FPBits = __llvm_libc::fputil::FPBits<long double>;
 
 TEST(LlvmLibclogblTest, SpecialNumbers) {
-  EXPECT_TRUE(FPBits::inf() == __llvm_libc::logbl(FPBits::inf()));
-  EXPECT_TRUE(FPBits::inf() == __llvm_libc::logbl(FPBits::negInf()));
+  EXPECT_TRUE(LD(FPBits::inf()) == __llvm_libc::logbl(LD(FPBits::inf())));
+  EXPECT_TRUE(LD(FPBits::inf()) == __llvm_libc::logbl(LD(FPBits::negInf())));
 
-  EXPECT_TRUE(FPBits::negInf() == __llvm_libc::logbl(FPBits::zero()));
-  EXPECT_TRUE(FPBits::negInf() == __llvm_libc::logbl(FPBits::negZero()));
+  EXPECT_TRUE(LD(FPBits::negInf()) == __llvm_libc::logbl(LD(FPBits::zero())));
+  EXPECT_TRUE(LD(FPBits::negInf()) ==
+              __llvm_libc::logbl(LD(FPBits::negZero())));
 
-  EXPECT_TRUE(FPBits(__llvm_libc::logbl(FPBits::buildNaN(1))).isNaN());
+  EXPECT_TRUE(FPBits(__llvm_libc::logbl(LD(FPBits::buildNaN(1)))).isNaN());
 }
 
 TEST(LlvmLibclogblTest, PowersOfTwo) {
@@ -66,7 +68,7 @@ TEST(LlvmLibcLogblTest, LongDoubleRange) {
   constexpr UIntType count = 10000000;
   constexpr UIntType step = UIntType(-1) / count;
   for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
-    long double x = FPBits(v);
+    long double x = LD(FPBits(v));
     if (isnan(x) || isinf(x) || x == 0.0l)
       continue;
 

From bbba69425c6131283163af99201577c296aa3877 Mon Sep 17 00:00:00 2001
From: Siva Chandra <sivachandra@google.com>
Date: Fri, 16 Apr 2021 22:29:52 -0700
Subject: [PATCH 20/52] [libc][NFC] Use explicit conversion in aarch64 FEnv.

---
 libc/utils/FPUtil/aarch64/FEnv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/utils/FPUtil/aarch64/FEnv.h b/libc/utils/FPUtil/aarch64/FEnv.h
index 44723ace3f3e..327ce0726568 100644
--- a/libc/utils/FPUtil/aarch64/FEnv.h
+++ b/libc/utils/FPUtil/aarch64/FEnv.h
@@ -115,8 +115,8 @@ static inline int setExcept(int excepts) {
 static inline int raiseExcept(int excepts) {
   float zero = 0.0f;
   float one = 1.0f;
-  float largeValue = FPBits<float>(FPBits<float>::maxNormal);
-  float smallValue = FPBits<float>(FPBits<float>::minNormal);
+  float largeValue = float(FPBits<float>(FPBits<float>::maxNormal));
+  float smallValue = float(FPBits<float>(FPBits<float>::minNormal));
   auto divfunc = [](float a, float b) {
     __asm__ __volatile__("ldr  s0, %0\n\t"
                          "ldr  s1, %1\n\t"

From d6de1e1a71406c75a4ea4d5a2fe84289f07ea3a1 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Wed, 24 Mar 2021 16:45:04 -0400
Subject: [PATCH 21/52] Normalize interaction with boolean attributes

Such attributes can either be unset, or set to "true" or "false" (as string).
throughout the codebase, this led to inelegant checks ranging from

        if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")

to

        if (Fn->hasAttribute("no-jump-tables") && Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")

Introduce a getValueAsBool that normalize the check, with the following
behavior:

no attributes or attribute set to "false" => return false
attribute set to "true" => return true

Differential Revision: https://reviews.llvm.org/D99299
---
 clang/lib/CodeGen/CodeGenFunction.cpp             |  2 +-
 llvm/include/llvm/CodeGen/TargetLowering.h        |  2 +-
 llvm/include/llvm/IR/Attributes.h                 |  4 ++++
 llvm/lib/Analysis/IVDescriptors.cpp               |  4 ++--
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp        |  2 +-
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp  |  2 +-
 llvm/lib/IR/AttributeImpl.h                       |  1 +
 llvm/lib/IR/Attributes.cpp                        | 12 ++++++++++++
 llvm/lib/IR/Verifier.cpp                          | 15 ++++++++++++++-
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp   |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp         |  2 +-
 .../Target/AMDGPU/AMDGPULowerKernelAttributes.cpp |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp  |  6 ++----
 llvm/lib/Target/ARM/ARMTargetMachine.cpp          |  3 +--
 llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp  |  3 +--
 llvm/lib/Target/M68k/M68kISelLowering.cpp         |  2 +-
 llvm/lib/Target/Mips/MipsTargetMachine.cpp        |  4 +---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp       |  9 +--------
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp      |  3 +--
 llvm/lib/Target/Sparc/SparcTargetMachine.cpp      |  4 +---
 llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp  |  4 +---
 llvm/lib/Target/TargetMachine.cpp                 |  2 +-
 llvm/lib/Target/X86/X86TargetMachine.cpp          |  3 +--
 .../Instrumentation/MemorySanitizer.cpp           |  2 +-
 .../Scalar/TailRecursionElimination.cpp           |  2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp         |  2 +-
 llvm/test/Verifier/invalid-strbool-attr.ll        |  9 +++++++++
 27 files changed, 64 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/Verifier/invalid-strbool-attr.ll

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index ca59dc4a2b61..09d49c9aa018 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -174,7 +174,7 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
 
   auto mergeFnAttrValue = [&](StringRef Name, bool Value) {
     auto OldValue =
-        CGF.CurFn->getFnAttribute(Name).getValueAsString() == "true";
+        CGF.CurFn->getFnAttribute(Name).getValueAsBool();
     auto NewValue = OldValue & Value;
     if (OldValue != NewValue)
       CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue));
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4b964dc26218..7c7778728f48 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1144,7 +1144,7 @@ class TargetLoweringBase {
 
   /// Return true if lowering to a jump table is allowed.
   virtual bool areJTsAllowed(const Function *Fn) const {
-    if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
+    if (Fn->getFnAttribute("no-jump-tables").getValueAsBool())
       return false;
 
     return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 8a87a56099e2..50047e25f69d 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -168,6 +168,10 @@ class Attribute {
   /// attribute be an integer attribute.
   uint64_t getValueAsInt() const;
 
+  /// Return the attribute's value as a boolean. This requires that the
+  /// attribute be a string attribute.
+  bool getValueAsBool() const;
+
   /// Return the attribute's kind as a string. This requires the
   /// attribute to be a string attribute.
   StringRef getKindAsString() const;
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 3427150288b8..945d0d30685e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -653,9 +653,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
   Function &F = *Header->getParent();
   FastMathFlags FMF;
   FMF.setNoNaNs(
-      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true");
+      F.getFnAttribute("no-nans-fp-math").getValueAsBool());
   FMF.setNoSignedZeros(
-      F.getFnAttribute("no-signed-zeros-fp-math").getValueAsString() == "true");
+      F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
 
   if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
     LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 79ac13cce5d6..faf3a848a69d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1145,7 +1145,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
     IsTailCall = false;
   if (IsTailCall && MF->getFunction()
                             .getFnAttribute("disable-tail-calls")
-                            .getValueAsString() == "true")
+                            .getValueAsBool())
     IsTailCall = false;
 
   CallLoweringInfo CLI;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fcb8d9b06847..870c4bf5e5a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -53,7 +53,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   const Function &F = DAG.getMachineFunction().getFunction();
 
   // First, check if tail calls have been disabled in this function.
-  if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
     return false;
 
   // Conservatively require the attributes of the call to match those of
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 60e2ec2c21be..3b297a9e3908 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -64,6 +64,7 @@ class AttributeImpl : public FoldingSetNode {
 
   Attribute::AttrKind getKindAsEnum() const;
   uint64_t getValueAsInt() const;
+  bool getValueAsBool() const;
 
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 60ad3b8d3ccd..30730a4374a5 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -287,6 +287,13 @@ uint64_t Attribute::getValueAsInt() const {
   return pImpl->getValueAsInt();
 }
 
+bool Attribute::getValueAsBool() const {
+  if (!pImpl) return false;
+  assert(isStringAttribute() &&
+         "Expected the attribute to be a string attribute!");
+  return pImpl->getValueAsBool();
+}
+
 StringRef Attribute::getKindAsString() const {
   if (!pImpl) return {};
   assert(isStringAttribute() &&
@@ -650,6 +657,11 @@ uint64_t AttributeImpl::getValueAsInt() const {
   return static_cast<const IntAttributeImpl *>(this)->getValue();
 }
 
+bool AttributeImpl::getValueAsBool() const {
+  assert(getValueAsString().empty() || getValueAsString() == "false" || getValueAsString() == "true");
+  return getValueAsString() == "true";
+}
+
 StringRef AttributeImpl::getKindAsString() const {
   assert(isStringAttribute());
   return static_cast<const StringAttributeImpl *>(this)->getStringKind();
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index d0bfa7ecd099..9d3c791995b1 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1717,8 +1717,21 @@ static bool isFuncOrArgAttr(Attribute::AttrKind Kind) {
 void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
                                     const Value *V) {
   for (Attribute A : Attrs) {
-    if (A.isStringAttribute())
+
+    if (A.isStringAttribute()) {
+#define GET_ATTR_NAMES
+#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME)
+#define ATTRIBUTE_STRBOOL(ENUM_NAME, DISPLAY_NAME)                             \
+  if (A.getKindAsString() == #DISPLAY_NAME) {                                  \
+    auto V = A.getValueAsString();                                             \
+    if (!(V.empty() || V == "true" || V == "false"))                           \
+      CheckFailed("invalid value for '" #DISPLAY_NAME "' attribute: " + V +    \
+                  "");                                                         \
+  }
+
+#include "llvm/IR/Attributes.inc"
       continue;
+    }
 
     if (A.isIntAttribute() !=
         Attribute::doesAttrKindHaveArgument(A.getKindAsEnum())) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 2556996df97f..154d9c3a7fda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -809,7 +809,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
 
 static bool hasUnsafeFPMath(const Function &F) {
   Attribute Attr = F.getFnAttribute("unsafe-fp-math");
-  return Attr.getValueAsString() == "true";
+  return Attr.getValueAsBool();
 }
 
 static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 6b7f57252b7a..2b2242e8767c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -476,7 +476,7 @@ bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
       return true;
   const Function *F = CI->getParent()->getParent();
   Attribute Attr = F->getFnAttribute("unsafe-fp-math");
-  return Attr.getValueAsString() == "true";
+  return Attr.getValueAsBool();
 }
 
 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 9ab6a5246ce5..17b75e0fa4e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -67,7 +67,7 @@ static bool processUse(CallInst *CI) {
   const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3;
 
   const bool HasUniformWorkGroupSize =
-    F->getFnAttribute("uniform-work-group-size").getValueAsString() == "true";
+    F->getFnAttribute("uniform-work-group-size").getValueAsBool();
 
   if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize)
     return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 51344976466e..07806dd5a974 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -28,12 +28,10 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
   const Function &F = MF.getFunction();
 
   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
-  MemoryBound = MemBoundAttr.isStringAttribute() &&
-                MemBoundAttr.getValueAsString() == "true";
+  MemoryBound = MemBoundAttr.getValueAsBool();
 
   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
-  WaveLimiter = WaveLimitAttr.isStringAttribute() &&
-                WaveLimitAttr.getValueAsString() == "true";
+  WaveLimiter = WaveLimitAttr.getValueAsBool();
 
   CallingConv::ID CC = F.getCallingConv();
   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index c09df077e257..033b98a82715 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -274,8 +274,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
-  bool SoftFloat =
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
   // If the soft float attribute is set on the function turn on the soft float
   // subtarget feature.
   if (SoftFloat)
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9195bb3dc725..9c0bcd96b227 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -251,8 +251,7 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
   // Creating a separate target feature is not strictly necessary, it only
   // exists to make "unsafe-fp-math" force creating a new subtarget.
 
-  if (FnAttrs.hasFnAttribute("unsafe-fp-math") &&
-      F.getFnAttribute("unsafe-fp-math").getValueAsString() == "true")
+  if (F.getFnAttribute("unsafe-fp-math").getValueAsBool())
     FS = FS.empty() ? "+unsafe-fp" : "+unsafe-fp," + FS;
 
   auto &I = SubtargetMap[CPU + FS];
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 8402bbbeefff..7c2e253a37c8 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -487,7 +487,7 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     report_fatal_error("M68k interrupts may not be called directly");
 
   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
-  if (Attr.getValueAsString() == "true")
+  if (Attr.getValueAsBool())
     IsTailCall = false;
 
   // FIXME Add tailcalls support
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 5b0b11089a6c..d4a71867630d 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -176,9 +176,7 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
   // function, so we can enable it as a subtarget feature.
-  bool softFloat =
-      F.hasFnAttribute("use-soft-float") &&
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool softFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
 
   if (hasMips16Attr)
     FS += FS.empty() ? "+mips16" : ",+mips16";
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8860e90f2806..6a9b25f044cf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4304,14 +4304,7 @@ bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
 
   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
   const Function &F = MF.getFunction();
-  if (F.hasFnAttribute("unsafe-fp-math")) {
-    Attribute Attr = F.getFnAttribute("unsafe-fp-math");
-    StringRef Val = Attr.getValueAsString();
-    if (Val == "true")
-      return true;
-  }
-
-  return false;
+  return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
 }
 
 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 32b19d5ddd10..a4cfd0ade863 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -343,8 +343,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
-  bool SoftFloat =
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
   // If the soft float attribute is set on the function turn on the soft float
   // subtarget feature.
   if (SoftFloat)
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index ae5228db5827..083339bc157c 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -117,9 +117,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
   // function, so we can enable it as a subtarget feature.
-  bool softFloat =
-      F.hasFnAttribute("use-soft-float") &&
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool softFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
 
   if (softFloat)
     FS += FS.empty() ? "+soft-float" : ",+soft-float";
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 7b78dc4ad13a..ebb8ed97bb59 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -179,9 +179,7 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
   // function, so we can enable it as a subtarget feature.
-  bool softFloat =
-    F.hasFnAttribute("use-soft-float") &&
-    F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool softFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
 
   if (softFloat)
     FS += FS.empty() ? "+soft-float" : ",+soft-float";
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 2aee0e5c3fb8..0a655a82b889 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -56,7 +56,7 @@ bool TargetMachine::isPositionIndependent() const {
 void TargetMachine::resetTargetOptions(const Function &F) const {
 #define RESET_OPTION(X, Y)                                              \
   do {                                                                  \
-    Options.X = (F.getFnAttribute(Y).getValueAsString() == "true");     \
+    Options.X = F.getFnAttribute(Y).getValueAsBool();     \
   } while (0)
 
   RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 32b90e31bec3..ff99186609e9 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -294,8 +294,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
   // function before we can generate a subtarget. We also need to use
   // it as a key for the subtarget since that can be the only difference
   // between two functions.
-  bool SoftFloat =
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
   // If the soft float attribute is set on the function turn on the soft float
   // subtarget feature.
   if (SoftFloat)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 64574a6aa231..3e4fae586aae 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5028,7 +5028,7 @@ struct VarArgSystemZHelper : public VarArgHelper {
   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
     bool IsSoftFloatABI = CB.getCalledFunction()
                               ->getFnAttribute("use-soft-float")
-                              .getValueAsString() == "true";
+                              .getValueAsBool();
     unsigned GpOffset = SystemZGpOffset;
     unsigned FpOffset = SystemZFpOffset;
     unsigned VrIndex = 0;
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 8cc649a8c1ed..801c9ef68bb1 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -799,7 +799,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
                                         AliasAnalysis *AA,
                                         OptimizationRemarkEmitter *ORE,
                                         DomTreeUpdater &DTU) {
-  if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
     return false;
 
   bool MadeChange = false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 9fdcb76aafda..cb98227e0bf7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5793,7 +5793,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   // Only build lookup table when we have a target that supports it or the
   // attribute is not set.
   if (!TTI.shouldBuildLookupTables() ||
-      (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
+      (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
     return false;
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
diff --git a/llvm/test/Verifier/invalid-strbool-attr.ll b/llvm/test/Verifier/invalid-strbool-attr.ll
new file mode 100644
index 000000000000..672c9e4e9db5
--- /dev/null
+++ b/llvm/test/Verifier/invalid-strbool-attr.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: invalid value for 'no-jump-tables' attribute: yes
+
+define void @func() #0 {
+  ret void
+}
+
+attributes #0 = { "no-jump-tables"="yes" }

From 7c74ce3c686938e95a08a05ea1e2a714eac43167 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 17 Apr 2021 00:29:51 -0700
Subject: [PATCH 22/52] [ELF] --wrap: don't clear sym->isUsedInRegularObj if
 real->isUsedInRegularObj; set wrap's initial binding to sym's

Fix PR49897: if `__real_foo` has the isUsedInRegularObj bit set, we need to
retain `foo` in .symtab, even if `foo` is undefined. The new behavior will match
GNU ld.

Before the patch, we produced an R_X86_64_JUMP_SLOT relocation referencing the
index 0 undefined symbol, which would be erroed by glibc
(see f96ff3c0f8ebd941b3f6b345164c3d858b781484).

While here, fix another bug: if `__wrap_foo` does not exist, its initial binding
should be `foo`'s.
---
 lld/ELF/Driver.cpp                       |  8 +++---
 lld/ELF/SymbolTable.cpp                  |  2 +-
 lld/test/ELF/Inputs/wrap-dynamic-undef.s |  2 --
 lld/test/ELF/wrap-dynamic-undef.s        | 34 ++++++++++++++++++++----
 4 files changed, 35 insertions(+), 11 deletions(-)
 delete mode 100644 lld/test/ELF/Inputs/wrap-dynamic-undef.s

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 2daee212cc00..1b840c166e09 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1888,8 +1888,9 @@ static Symbol *addUndefined(StringRef name) {
       Undefined{nullptr, name, STB_GLOBAL, STV_DEFAULT, 0});
 }
 
-static Symbol *addUnusedUndefined(StringRef name) {
-  Undefined sym{nullptr, name, STB_GLOBAL, STV_DEFAULT, 0};
+static Symbol *addUnusedUndefined(StringRef name,
+                                  uint8_t binding = STB_GLOBAL) {
+  Undefined sym{nullptr, name, binding, STV_DEFAULT, 0};
   sym.isUsedInRegularObj = false;
   return symtab->addSymbol(sym);
 }
@@ -1953,7 +1954,8 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
       continue;
 
     Symbol *real = addUnusedUndefined(saver.save("__real_" + name));
-    Symbol *wrap = addUnusedUndefined(saver.save("__wrap_" + name));
+    Symbol *wrap =
+        addUnusedUndefined(saver.save("__wrap_" + name), sym->binding);
     v.push_back({sym, real, wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 6283d943984a..70aea288c53f 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -42,7 +42,7 @@ void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
 
   if (real->exportDynamic)
     sym->exportDynamic = true;
-  if (sym->isUndefined())
+  if (!real->isUsedInRegularObj && sym->isUndefined())
     sym->isUsedInRegularObj = false;
 
   // Now renaming is complete, and no one refers to real. We drop real from
diff --git a/lld/test/ELF/Inputs/wrap-dynamic-undef.s b/lld/test/ELF/Inputs/wrap-dynamic-undef.s
deleted file mode 100644
index ade79556db7b..000000000000
--- a/lld/test/ELF/Inputs/wrap-dynamic-undef.s
+++ /dev/null
@@ -1,2 +0,0 @@
-.global foo
-foo:
diff --git a/lld/test/ELF/wrap-dynamic-undef.s b/lld/test/ELF/wrap-dynamic-undef.s
index af2871cfe6ea..ca1698173ef5 100644
--- a/lld/test/ELF/wrap-dynamic-undef.s
+++ b/lld/test/ELF/wrap-dynamic-undef.s
@@ -1,9 +1,10 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/wrap-dynamic-undef.s -o %t2.o
-# RUN: ld.lld %t2.o -o %t2.so -shared
-# RUN: ld.lld %t1.o %t2.so -o %t --wrap foo
-# RUN: llvm-readelf --dyn-syms %t | FileCheck %s
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/def.s -o %t/def.o
+# RUN: ld.lld %t/def.o -o %t/def.so -shared --soname=def
+# RUN: ld.lld %t/a.o %t/def.so -o %t1 --wrap foo
+# RUN: llvm-readelf --dyn-syms %t1 | FileCheck %s
 
 # Test that the dynamic relocation uses foo. We used to produce a
 # relocation with __real_foo.
@@ -12,6 +13,29 @@
 # CHECK:      NOTYPE  LOCAL  DEFAULT  UND
 # CHECK-NEXT: NOTYPE  GLOBAL DEFAULT  UND foo
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/b.s -o %t/b.o
+# RUN: ld.lld -shared --wrap foo %t/b.o -o %t2.so
+# RUN: llvm-readelf --dyn-syms %t2.so | FileCheck %s --check-prefix=SYM2
+
+# SYM2:      Symbol table '.dynsym' contains 4 entries:
+# SYM2:      NOTYPE  LOCAL  DEFAULT   UND
+# SYM2-NEXT: NOTYPE  WEAK   DEFAULT   UND foo
+# SYM2-NEXT: NOTYPE  WEAK   DEFAULT   UND __wrap_foo
+# SYM2-NEXT: NOTYPE  GLOBAL DEFAULT [[#]] _start
+
+#--- a.s
 .global _start
 _start:
 	callq	__real_foo@plt
+
+#--- def.s
+.globl foo
+foo:
+
+#--- b.s
+.weak foo
+.weak __real_foo
+.global _start
+_start:
+  call __real_foo@plt
+  call foo@plt

From 4583759414572046284619cb1f45eb52c866ee8a Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 17 Apr 2021 11:08:00 +0100
Subject: [PATCH 23/52] [Sanitizers] Undefined Behavior Sanitizer support for
 DragonFlyBSD Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D89631
---
 compiler-rt/lib/ubsan/ubsan_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/ubsan/ubsan_platform.h b/compiler-rt/lib/ubsan/ubsan_platform.h
index 32d949d75b9c..9d67041cda4e 100644
--- a/compiler-rt/lib/ubsan/ubsan_platform.h
+++ b/compiler-rt/lib/ubsan/ubsan_platform.h
@@ -14,7 +14,7 @@
 
 // Other platforms should be easy to add, and probably work as-is.
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) ||        \
-    defined(__NetBSD__) || \
+    defined(__NetBSD__) || defined(__DragonFly__) \
     (defined(__sun__) && defined(__svr4__)) || \
     defined(_WIN32) || defined(__Fuchsia__) || defined(__rtems__)
 # define CAN_SANITIZE_UB 1

From 0df0d6acea3365f4039cb889b787f02664e07032 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 17 Apr 2021 11:10:35 +0100
Subject: [PATCH 24/52] [Sanitizers] DragonFlyBSD adding support for builtins
 Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D89653
---
 compiler-rt/lib/builtins/atomic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c
index f48cdc10ccf7..2d109d2d1bca 100644
--- a/compiler-rt/lib/builtins/atomic.c
+++ b/compiler-rt/lib/builtins/atomic.c
@@ -52,7 +52,7 @@ static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
 // defined.  Each platform should define the Lock type, and corresponding
 // lock() and unlock() functions.
 ////////////////////////////////////////////////////////////////////////////////
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__DragonFly__)
 #include <errno.h>
 // clang-format off
 #include <sys/types.h>

From 61fc02dc037c61343ebc465301ad0d492912dae7 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 17 Apr 2021 11:15:31 +0100
Subject: [PATCH 25/52] [Sanitizers] Fix build

---
 compiler-rt/lib/ubsan/ubsan_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/ubsan/ubsan_platform.h b/compiler-rt/lib/ubsan/ubsan_platform.h
index 9d67041cda4e..51e535d1e222 100644
--- a/compiler-rt/lib/ubsan/ubsan_platform.h
+++ b/compiler-rt/lib/ubsan/ubsan_platform.h
@@ -14,7 +14,7 @@
 
 // Other platforms should be easy to add, and probably work as-is.
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) ||        \
-    defined(__NetBSD__) || defined(__DragonFly__) \
+    defined(__NetBSD__) || defined(__DragonFly__) || \
     (defined(__sun__) && defined(__svr4__)) || \
     defined(_WIN32) || defined(__Fuchsia__) || defined(__rtems__)
 # define CAN_SANITIZE_UB 1

From 595394321d51c49e317a8a8da944e203f9b8633c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 17 Apr 2021 12:00:38 +0100
Subject: [PATCH 26/52] [Support] AbsoluteDifference - add brackets to appease
 static analyzer warning. NFCI.

---
 llvm/include/llvm/Support/MathExtras.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 10d4260a7eb1..753b1998c40c 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -792,7 +792,7 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
 /// value of the result.
 template <typename T>
 std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
-  return X > Y ? X - Y : Y - X;
+  return X > Y ? (X - Y) : (Y - X);
 }
 
 /// Add two unsigned integers, X and Y, of type T.  Clamp the result to the

From e68b12c99eaf9cdfdf3e3e7c4533bb03b60afd36 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 17 Apr 2021 14:51:29 +0200
Subject: [PATCH 27/52] [Inline] Don't add noalias metadata to
 inaccessiblememonly calls

It will not do anything useful for them, as we already know that
they don't modref with any accessible memory.

In particular, this prevents noalias metadata from being placed
on noalias.scope.decl intrinsics. This reduces the amount of
metadata needed, and makes it more likely that unnecessary decls
can be eliminated.
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |   5 +
 llvm/test/Transforms/Inline/noalias-calls2.ll | 100 +++++++++---------
 llvm/test/Transforms/Inline/noalias2.ll       |  39 ++++---
 3 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 60fc5b2e0a0d..201e4e1c58da 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1041,6 +1041,11 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
         IsFuncCall = true;
         if (CalleeAAR) {
           FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
+
+          // We'll retain this knowledge without additional metadata.
+          if (AAResults::onlyAccessesInaccessibleMem(MRB))
+            continue;
+
           if (AAResults::onlyAccessesArgPointees(MRB))
             IsArgMemOnlyCall = true;
         }
diff --git a/llvm/test/Transforms/Inline/noalias-calls2.ll b/llvm/test/Transforms/Inline/noalias-calls2.ll
index 400bb1d6147c..28450021afee 100644
--- a/llvm/test/Transforms/Inline/noalias-calls2.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls2.ll
@@ -29,10 +29,10 @@ define void @caller_equals_callee(i32* noalias %p0, i32* noalias %p1, i32 %cnt)
 ; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !8)
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR2]], i64 2
 ; CHECK-NEXT:    [[ADD_PTR1_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR3]], i64 2
-; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata !10), !noalias !13
-; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata !14), !noalias !13
-; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I]], align 4, !alias.scope !16, !noalias !17
-; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I]], align 4, !alias.scope !17, !noalias !16
+; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata !10)
+; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata !13)
+; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I]], align 4, !alias.scope !15, !noalias !16
+; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I]], align 4, !alias.scope !16, !noalias !15
 ; CHECK-NEXT:    store i32 11, i32* [[ADD_PTR2]], align 4, !alias.scope !5, !noalias !8
 ; CHECK-NEXT:    store i32 12, i32* [[P1]], align 4
 ; CHECK-NEXT:    br label [[IF_END]]
@@ -71,32 +71,32 @@ define void @test01(i32* noalias %p0, i32* noalias %p1, i32 %cnt) {
 ; CHECK-NEXT:    store i32 13, i32* [[P0]], align 4
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 1
 ; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i32, i32* [[P1]], i64 1
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !18)
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !21)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !17)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !20)
 ; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR]], i64 2
 ; CHECK-NEXT:    [[ADD_PTR1_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR1]], i64 2
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !23), !noalias !26
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !27), !noalias !26
-; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I]], align 4, !alias.scope !29, !noalias !30
-; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I]], align 4, !alias.scope !30, !noalias !29
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !22)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !25)
+; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I]], align 4, !alias.scope !27, !noalias !28
+; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I]], align 4, !alias.scope !28, !noalias !27
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[CNT]], 0
 ; CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]]
 ; CHECK:       if.then.i:
-; CHECK-NEXT:    store i32 11, i32* [[ADD_PTR]], align 4, !alias.scope !18, !noalias !21
+; CHECK-NEXT:    store i32 11, i32* [[ADD_PTR]], align 4, !alias.scope !17, !noalias !20
 ; CHECK-NEXT:    br label [[CALLER_EQUALS_CALLEE_EXIT:%.*]]
 ; CHECK:       if.else.i:
 ; CHECK-NEXT:    [[ADD_PTR2_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR1]], i64 1
 ; CHECK-NEXT:    [[ADD_PTR3_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR]], i64 1
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !31), !noalias !26
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !34), !noalias !26
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !29)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !32)
 ; CHECK-NEXT:    [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR2_I]], i64 2
 ; CHECK-NEXT:    [[ADD_PTR1_I_I:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR3_I]], i64 2
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !36), !noalias !39
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !40), !noalias !39
-; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I_I]], align 4, !alias.scope !42, !noalias !43
-; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I_I]], align 4, !alias.scope !43, !noalias !42
-; CHECK-NEXT:    store i32 11, i32* [[ADD_PTR2_I]], align 4, !alias.scope !44, !noalias !45
-; CHECK-NEXT:    store i32 12, i32* [[ADD_PTR1]], align 4, !alias.scope !21, !noalias !18
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !34)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !37)
+; CHECK-NEXT:    store i32 10, i32* [[ADD_PTR_I_I]], align 4, !alias.scope !39, !noalias !40
+; CHECK-NEXT:    store i32 20, i32* [[ADD_PTR1_I_I]], align 4, !alias.scope !40, !noalias !39
+; CHECK-NEXT:    store i32 11, i32* [[ADD_PTR2_I]], align 4, !alias.scope !41, !noalias !42
+; CHECK-NEXT:    store i32 12, i32* [[ADD_PTR1]], align 4, !alias.scope !20, !noalias !17
 ; CHECK-NEXT:    br label [[CALLER_EQUALS_CALLEE_EXIT]]
 ; CHECK:       caller_equals_callee.exit:
 ; CHECK-NEXT:    ret void
@@ -130,37 +130,33 @@ attributes #0 = { inaccessiblememonly nofree nosync nounwind willreturn }
 ; CHECK: !10 = !{!11}
 ; CHECK: !11 = distinct !{!11, !12, !"do_store: %p0"}
 ; CHECK: !12 = distinct !{!12, !"do_store"}
-; CHECK: !13 = !{!6, !9}
-; CHECK: !14 = !{!15}
-; CHECK: !15 = distinct !{!15, !12, !"do_store: %p1"}
-; CHECK: !16 = !{!11, !6}
-; CHECK: !17 = !{!15, !9}
-
-; CHECK: !18 = !{!19}
-; CHECK: !19 = distinct !{!19, !20, !"caller_equals_callee: %p0"}
-; CHECK: !20 = distinct !{!20, !"caller_equals_callee"}
-; CHECK: !21 = !{!22}
-; CHECK: !22 = distinct !{!22, !20, !"caller_equals_callee: %p1"}
-; CHECK: !23 = !{!24}
-; CHECK: !24 = distinct !{!24, !25, !"do_store: %p0"}
-; CHECK: !25 = distinct !{!25, !"do_store"}
-; CHECK: !26 = !{!19, !22}
-; CHECK: !27 = !{!28}
-; CHECK: !28 = distinct !{!28, !25, !"do_store: %p1"}
-; CHECK: !29 = !{!24, !19}
-; CHECK: !30 = !{!28, !22}
-; CHECK: !31 = !{!32}
-; CHECK: !32 = distinct !{!32, !33, !"caller_equals_callee: %p0"}
-; CHECK: !33 = distinct !{!33, !"caller_equals_callee"}
+; CHECK: !13 = !{!14}
+; CHECK: !14 = distinct !{!14, !12, !"do_store: %p1"}
+; CHECK: !15 = !{!11, !6}
+; CHECK: !16 = !{!14, !9}
+; CHECK: !17 = !{!18}
+; CHECK: !18 = distinct !{!18, !19, !"caller_equals_callee: %p0"}
+; CHECK: !19 = distinct !{!19, !"caller_equals_callee"}
+; CHECK: !20 = !{!21}
+; CHECK: !21 = distinct !{!21, !19, !"caller_equals_callee: %p1"}
+; CHECK: !22 = !{!23}
+; CHECK: !23 = distinct !{!23, !24, !"do_store: %p0"}
+; CHECK: !24 = distinct !{!24, !"do_store"}
+; CHECK: !25 = !{!26}
+; CHECK: !26 = distinct !{!26, !24, !"do_store: %p1"}
+; CHECK: !27 = !{!23, !18}
+; CHECK: !28 = !{!26, !21}
+; CHECK: !29 = !{!30}
+; CHECK: !30 = distinct !{!30, !31, !"caller_equals_callee: %p0"}
+; CHECK: !31 = distinct !{!31, !"caller_equals_callee"}
+; CHECK: !32 = !{!33}
+; CHECK: !33 = distinct !{!33, !31, !"caller_equals_callee: %p1"}
 ; CHECK: !34 = !{!35}
-; CHECK: !35 = distinct !{!35, !33, !"caller_equals_callee: %p1"}
-; CHECK: !36 = !{!37}
-; CHECK: !37 = distinct !{!37, !38, !"do_store: %p0"}
-; CHECK: !38 = distinct !{!38, !"do_store"}
-; CHECK: !39 = !{!32, !35, !19, !22}
-; CHECK: !40 = !{!41}
-; CHECK: !41 = distinct !{!41, !38, !"do_store: %p1"}
-; CHECK: !42 = !{!37, !32, !22}
-; CHECK: !43 = !{!41, !35, !19}
-; CHECK: !44 = !{!32, !22}
-; CHECK: !45 = !{!35, !19}
+; CHECK: !35 = distinct !{!35, !36, !"do_store: %p0"}
+; CHECK: !36 = distinct !{!36, !"do_store"}
+; CHECK: !37 = !{!38}
+; CHECK: !38 = distinct !{!38, !36, !"do_store: %p1"}
+; CHECK: !39 = !{!35, !30, !21}
+; CHECK: !40 = !{!38, !33, !18}
+; CHECK: !41 = !{!30, !21}
+; CHECK: !42 = !{!33, !18}
diff --git a/llvm/test/Transforms/Inline/noalias2.ll b/llvm/test/Transforms/Inline/noalias2.ll
index c2c743605699..58ef1cb88ce2 100644
--- a/llvm/test/Transforms/Inline/noalias2.ll
+++ b/llvm/test/Transforms/Inline/noalias2.ll
@@ -71,21 +71,21 @@ define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture rea
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !5)
 ; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !8)
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !10) [[ATTR2:#.*]], !noalias !13
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !14) [[ATTR2]], !noalias !13
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !16, !noalias !17
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !10) [[ATTR2:#.*]]
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !13) [[ATTR2]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !15, !noalias !16
 ; CHECK-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
-; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I_I]], align 4, !alias.scope !17, !noalias !16
+; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I_I]], align 4, !alias.scope !16, !noalias !15
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C]], align 4, !alias.scope !8, !noalias !5
 ; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 7
 ; CHECK-NEXT:    store float [[TMP1]], float* [[ARRAYIDX_I]], align 4, !alias.scope !5, !noalias !8
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !18)
-; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !21)
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[C]], align 4, !noalias !23
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !17)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !20)
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[C]], align 4, !noalias !22
 ; CHECK-NEXT:    [[ARRAYIDX_I1:%.*]] = getelementptr inbounds float, float* [[A]], i64 6
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX_I1]], align 4, !alias.scope !18, !noalias !21
+; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX_I1]], align 4, !alias.scope !17, !noalias !20
 ; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX1_I]], align 4, !alias.scope !21, !noalias !18
+; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX1_I]], align 4, !alias.scope !20, !noalias !17
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[C]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7
 ; CHECK-NEXT:    store float [[TMP3]], float* [[ARRAYIDX]], align 4
@@ -113,17 +113,16 @@ entry:
 ; CHECK: !10 = !{!11}
 ; CHECK: !11 = distinct !{!11, !12, !"hello: %a"}
 ; CHECK: !12 = distinct !{!12, !"hello"}
-; CHECK: !13 = !{!6, !9}
-; CHECK: !14 = !{!15}
-; CHECK: !15 = distinct !{!15, !12, !"hello: %c"}
-; CHECK: !16 = !{!15, !9}
-; CHECK: !17 = !{!11, !6}
-; CHECK: !18 = !{!19}
-; CHECK: !19 = distinct !{!19, !20, !"hello2: %a"}
-; CHECK: !20 = distinct !{!20, !"hello2"}
-; CHECK: !21 = !{!22}
-; CHECK: !22 = distinct !{!22, !20, !"hello2: %b"}
-; CHECK: !23 = !{!19, !22}
+; CHECK: !13 = !{!14}
+; CHECK: !14 = distinct !{!14, !12, !"hello: %c"}
+; CHECK: !15 = !{!14, !9}
+; CHECK: !16 = !{!11, !6}
+; CHECK: !17 = !{!18}
+; CHECK: !18 = distinct !{!18, !19, !"hello2: %a"}
+; CHECK: !19 = distinct !{!19, !"hello2"}
+; CHECK: !20 = !{!21}
+; CHECK: !21 = distinct !{!21, !19, !"hello2: %b"}
+; CHECK: !22 = !{!18, !21}
 
 attributes #0 = { nounwind uwtable }
 

From af523514c4b9e0bd04bffb1f6ca2922c83df4c36 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 17 Apr 2021 13:12:15 +0100
Subject: [PATCH 28/52] [SimplifyCFG] Skip dbg intrinsics when checking for
 branch-only BBs.

Debug intrinsics are free to hoist and should be skipped when looking
for terminator-only blocks. As a consequence, we have to delegate to the
main hoisting loop to hoist any dbg intrinsics instead of jumping to the
terminator case directly.

This fixes PR49982.

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D100640
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp      | 10 +++++++---
 .../Transforms/SimplifyCFG/hoist-dbgvalue.ll   | 18 ++++++++----------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index cb98227e0bf7..3a2da84e0338 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1434,11 +1434,15 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
   // Check if only hoisting terminators is allowed. This does not add new
   // instructions to the hoist location.
   if (EqTermsOnly) {
-    if (!I1->isIdenticalToWhenDefined(I2))
+    // Skip any debug intrinsics, as they are free to hoist.
+    auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
+    auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
+    if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
       return false;
-    if (!I1->isTerminator())
+    if (!I1NonDbg->isTerminator())
       return false;
-    goto HoistTerminator;
+    // Now we know that we only need to hoist debug instrinsics and the
+    // terminator. Let the loop below handle those 2 cases.
   }
 
   do {
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index aff0ac318357..ca0c0b6140c8 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -45,8 +45,10 @@ define i1 @hoist_with_debug2(i32 %x) !dbg !22 {
 ; CHECK-LABEL: @hoist_with_debug2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ugt i32 [[X:%.*]], 2
-; CHECK-NEXT:    [[P:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true
-; CHECK-NEXT:    ret i1 [[P]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21]], metadata !DIExpression()), !dbg [[DBG23]]
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true
+; CHECK-NEXT:    ret i1 [[DOT]]
 ;
 entry:
   %tobool.not = icmp ugt i32 %x, 2
@@ -72,15 +74,11 @@ define i16 @hoist_with_debug3_pr49982(i32 %x, i1 %c.2) !dbg !26 {
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[C_0:%.*]] = icmp sgt i32 [[X:%.*]], 0
-; CHECK-NEXT:    br i1 [[C_0]], label [[CHECK:%.*]], label [[LATCH:%.*]]
-; CHECK:       check:
-; CHECK-NEXT:    [[C_1:%.*]] = icmp ugt i32 [[X]], 2
-; CHECK-NEXT:    br label [[EXIT_1:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    br i1 [[C_2:%.*]], label [[EXIT_1]], label [[FOR_COND]]
+; CHECK-NEXT:    [[BRMERGE:%.*]] = or i1 [[C_0]], [[C_2:%.*]]
+; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[C_0]], i16 0, i16 20
+; CHECK-NEXT:    br i1 [[BRMERGE]], label [[EXIT_1:%.*]], label [[FOR_COND]]
 ; CHECK:       exit.1:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i16 [ 20, [[LATCH]] ], [ 0, [[CHECK]] ]
-; CHECK-NEXT:    ret i16 [[MERGE]]
+; CHECK-NEXT:    ret i16 [[DOTMUX]]
 ;
 entry:
   br label %for.cond

From ae2da68da62653eddc72084990ef75d39287cefd Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 17 Apr 2021 16:56:21 +0200
Subject: [PATCH 29/52] [LICM] Add more tests for promotion and capture (NFC)

We could optimize the first case, as the pointer is captured only
after the loop.
---
 llvm/test/Transforms/LICM/promote-capture.ll | 152 +++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 llvm/test/Transforms/LICM/promote-capture.ll

diff --git a/llvm/test/Transforms/LICM/promote-capture.ll b/llvm/test/Transforms/LICM/promote-capture.ll
new file mode 100644
index 000000000000..9ece3c01eddd
--- /dev/null
+++ b/llvm/test/Transforms/LICM/promote-capture.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
+
+declare i1 @cond(i32 %v) readnone
+declare void @capture(i32* %p) readnone
+
+define void @test_captured_after_loop(i32 %len) {
+; CHECK-LABEL: @test_captured_after_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
+; CHECK:       if:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %count = alloca i32
+  store i32 0, i32* %count
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %latch ]
+  %cond = call i1 @cond(i32 %i)
+  br i1 %cond, label %if, label %latch
+
+if:
+  %c = load i32, i32* %count
+  %c.inc = add i32 %c, 1
+  store i32 %c.inc, i32* %count
+  br label %latch
+
+latch:
+  %i.next = add nuw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %len
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  call void @capture(i32* %count)
+  ret void
+}
+
+define void @test_captured_in_loop(i32 %len) {
+; CHECK-LABEL: @test_captured_in_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
+; CHECK:       if:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
+; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %count = alloca i32
+  store i32 0, i32* %count
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %latch ]
+  %cond = call i1 @cond(i32 %i)
+  br i1 %cond, label %if, label %latch
+
+if:
+  %c = load i32, i32* %count
+  %c.inc = add i32 %c, 1
+  store i32 %c.inc, i32* %count
+  call void @capture(i32* %count)
+  br label %latch
+
+latch:
+  %i.next = add nuw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %len
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @test_captured_before_loop(i32 %len) {
+; CHECK-LABEL: @test_captured_before_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COUNT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, i32* [[COUNT]], align 4
+; CHECK-NEXT:    call void @capture(i32* [[COUNT]])
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[COND:%.*]] = call i1 @cond(i32 [[I]])
+; CHECK-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
+; CHECK:       if:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[COUNT]], align 4
+; CHECK-NEXT:    [[C_INC:%.*]] = add i32 [[C]], 1
+; CHECK-NEXT:    store i32 [[C_INC]], i32* [[COUNT]], align 4
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %count = alloca i32
+  store i32 0, i32* %count
+  call void @capture(i32* %count)
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %latch ]
+  %cond = call i1 @cond(i32 %i)
+  br i1 %cond, label %if, label %latch
+
+if:
+  %c = load i32, i32* %count
+  %c.inc = add i32 %c, 1
+  store i32 %c.inc, i32* %count
+  br label %latch
+
+latch:
+  %i.next = add nuw i32 %i, 1
+  %cmp = icmp eq i32 %i.next, %len
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}

From 3597f02fd5c62f7c49c71b92e467128ffe2cf9cd Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Wed, 17 Mar 2021 14:31:06 +0000
Subject: [PATCH 30/52] [AMDGPU] Add GlobalDCE before internalization pass

The internalization pass only internalizes global variables
with no users. If the global variable has some dead user,
the internalization pass will not internalize it.

To be able to internalize global variables with dead
users, a global dce pass is needed before the
internalization pass.

This patch adds that.

Reviewed by: Artem Belevich, Matt Arsenault

Differential Revision: https://reviews.llvm.org/D98783
---
 clang/test/CodeGenCUDA/unused-global-var.cu   | 53 +++++++++++++++++++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 ++
 2 files changed, 56 insertions(+)
 create mode 100644 clang/test/CodeGenCUDA/unused-global-var.cu

diff --git a/clang/test/CodeGenCUDA/unused-global-var.cu b/clang/test/CodeGenCUDA/unused-global-var.cu
new file mode 100644
index 000000000000..1dbb3a22563c
--- /dev/null
+++ b/clang/test/CodeGenCUDA/unused-global-var.cu
@@ -0,0 +1,53 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s
+
+#include "Inputs/cuda.h"
+
+// AMDGPU internalize unused global variables for whole-program compilation
+// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
+// eliminated by global DCE. If there are invisible unused address space casts
+// for global variables, these dead users need to be eliminated by global
+// DCE before internalization. This test makes sure unused global variables
+// are eliminated.
+
+// Check unused device/constant variables are eliminated.
+
+// NEGCHK-NOT: @v1
+__device__ int v1;
+
+// NEGCHK-NOT: @v2
+__constant__ int v2;
+
+// NEGCHK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK-DAG: @v4
+__managed__ int v4;
+
+// Check used device/constant variables are not eliminated.
+// CHECK-DAG: @u1
+__device__ int u1;
+
+// CHECK-DAG: @u2
+__constant__ int u2;
+
+// Check u3 is kept because its address is taken.
+// CHECK-DAG: @_ZL2u3
+constexpr int u3 = 2;
+
+// Check u4 is not kept because it is not ODR-use.
+// NEGCHK-NOT: @_ZL2u4
+constexpr int u4 = 3;
+
+__device__ int fun1(const int& x);
+
+__global__ void kern1(int *x) {
+  *x = u1 + u2 + fun1(u3) + u4;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce39609da303..1b3b56f5dc71 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -575,6 +575,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
         PM.addPass(AMDGPUPrintfRuntimeBindingPass());
 
         if (InternalizeSymbols) {
+          // Global variables may have dead uses which need to be removed.
+          // Otherwise these useless global variables will not get internalized.
+          PM.addPass(GlobalDCEPass());
           PM.addPass(InternalizePass(mustPreserveGV));
         }
         PM.addPass(AMDGPUPropagateAttributesLatePass(*this));

From d5c0f00e216aa6797499bb4c8aacac930d8a819b Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Wed, 17 Mar 2021 16:14:03 -0400
Subject: [PATCH 31/52] [CUDA][HIP] Mark device var used by host only

Add device variables to llvm.compiler.used if they are
ODR-used by either host or device functions.

This is necessary to prevent them from being
eliminated by whole-program optimization
where the compiler has no way to know a device
variable is used by some host code.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D98814
---
 clang/lib/CodeGen/CGCUDANV.cpp                | 22 +++++++++
 .../test/CodeGenCUDA/host-used-device-var.cu  | 47 +++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 clang/test/CodeGenCUDA/host-used-device-var.cu

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index b224de7c197a..27fe048f827d 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1089,6 +1089,28 @@ void CGNVCUDARuntime::transformManagedVars() {
 llvm::Function *CGNVCUDARuntime::finalizeModule() {
   if (CGM.getLangOpts().CUDAIsDevice) {
     transformManagedVars();
+
+    // Mark ODR-used device variables as compiler used to prevent it from being
+    // eliminated by optimization. This is necessary for device variables
+    // ODR-used by host functions. Sema correctly marks them as ODR-used no
+    // matter whether they are ODR-used by device or host functions.
+    //
+    // We do not need to do this if the variable has used attribute since it
+    // has already been added.
+    //
+    // Static device variables have been externalized at this point, therefore
+    // variables with LLVM private or internal linkage need not be added.
+    for (auto &&Info : DeviceVars) {
+      auto Kind = Info.Flags.getKind();
+      if (!Info.Var->isDeclaration() &&
+          !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
+          (Kind == DeviceVarFlags::Variable ||
+           Kind == DeviceVarFlags::Surface ||
+           Kind == DeviceVarFlags::Texture) &&
+          Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
+        CGM.addCompilerUsedGlobal(Info.Var);
+      }
+    }
     return nullptr;
   }
   return makeModuleCtorFunction();
diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
new file mode 100644
index 000000000000..fd501ed1f2fd
--- /dev/null
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -0,0 +1,47 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// Check device variables used by neither host nor device functioins are not kept.
+
+// CHECK-NOT: @v1
+__device__ int v1;
+
+// CHECK-NOT: @v2
+__constant__ int v2;
+
+// CHECK-NOT: @_ZL2v3
+static __device__ int v3;
+
+// Check device variables used by host functions are kept.
+
+// CHECK-DAG: @u1
+__device__ int u1;
+
+// CHECK-DAG: @u2
+__constant__ int u2;
+
+// Check host-used static device var is in llvm.compiler.used.
+// CHECK-DAG: @_ZL2u3
+static __device__ int u3;
+
+// Check device-used static device var is emitted but is not in llvm.compiler.used.
+// CHECK-DAG: @_ZL2u4
+static __device__ int u4;
+
+// Check device variables with used attribute are always kept.
+// CHECK-DAG: @u5
+__device__ __attribute__((used)) int u5;
+
+int fun1() {
+  return u1 + u2 + u3;
+}
+
+__global__ void kern1(int **x) {
+  *x = &u4;
+}
+// Check the exact list of variables to ensure @_ZL2u4 is not among them.
+// CHECK: @llvm.compiler.used = {{[^@]*}} @_ZL2u3 {{[^@]*}} @u1 {{[^@]*}} @u2 {{[^@]*}} @u5

From 12a1f1d9d7e4f7ce416d0602d18991973986dfb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Sat, 17 Apr 2021 17:27:11 +0200
Subject: [PATCH 32/52] [Pragma] Added support for GCC unroll/nounroll

GCC 8 introduced these new pragmas to control loop unrolling. We should support them for compatibility reasons and the implementation itself requires few lines of code, since everything needed is already implemented for #pragma unroll/nounroll.
---
 clang/include/clang/Basic/AttrDocs.td       |   4 +-
 clang/lib/Parse/ParsePragma.cpp             |   4 +
 clang/test/CodeGenCXX/pragma-gcc-unroll.cpp | 109 ++++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenCXX/pragma-gcc-unroll.cpp

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 867865e91056..0af5b790d8a3 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3243,7 +3243,9 @@ def UnrollHintDocs : Documentation {
   let Content = [{
 Loop unrolling optimization hints can be specified with ``#pragma unroll`` and
 ``#pragma nounroll``. The pragma is placed immediately before a for, while,
-do-while, or c++11 range-based for loop.
+do-while, or c++11 range-based for loop. GCC's loop unrolling hints
+``#pragma GCC unroll`` and ``#pragma GCC nounroll`` are also supported and have
+identical semantics to ``#pragma unroll`` and ``#pragma nounroll``.
 
 Specifying ``#pragma unroll`` without a parameter directs the loop unroller to
 attempt to fully unroll the loop if the trip count is known at compile time and
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index 4ce8e4c4bb9d..660d317f57d0 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -405,9 +405,11 @@ void Parser::initializePragmaHandlers() {
 
   UnrollHintHandler = std::make_unique<PragmaUnrollHintHandler>("unroll");
   PP.AddPragmaHandler(UnrollHintHandler.get());
+  PP.AddPragmaHandler("GCC", UnrollHintHandler.get());
 
   NoUnrollHintHandler = std::make_unique<PragmaUnrollHintHandler>("nounroll");
   PP.AddPragmaHandler(NoUnrollHintHandler.get());
+  PP.AddPragmaHandler("GCC", NoUnrollHintHandler.get());
 
   UnrollAndJamHintHandler =
       std::make_unique<PragmaUnrollHintHandler>("unroll_and_jam");
@@ -523,9 +525,11 @@ void Parser::resetPragmaHandlers() {
   LoopHintHandler.reset();
 
   PP.RemovePragmaHandler(UnrollHintHandler.get());
+  PP.RemovePragmaHandler("GCC", UnrollHintHandler.get());
   UnrollHintHandler.reset();
 
   PP.RemovePragmaHandler(NoUnrollHintHandler.get());
+  PP.RemovePragmaHandler("GCC", NoUnrollHintHandler.get());
   NoUnrollHintHandler.reset();
 
   PP.RemovePragmaHandler(UnrollAndJamHintHandler.get());
diff --git a/clang/test/CodeGenCXX/pragma-gcc-unroll.cpp b/clang/test/CodeGenCXX/pragma-gcc-unroll.cpp
new file mode 100644
index 000000000000..ed75e0b6e3c3
--- /dev/null
+++ b/clang/test/CodeGenCXX/pragma-gcc-unroll.cpp
@@ -0,0 +1,109 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+// Check that passing -fno-unroll-loops does not impact the decision made using pragmas.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - -O1 -disable-llvm-optzns -fno-unroll-loops %s | FileCheck %s
+
+// Verify while loop is recognized after unroll pragma.
+void while_test(int *List, int Length) {
+  // CHECK: define {{.*}} @_Z10while_test
+  int i = 0;
+
+#pragma GCC unroll
+  while (i < Length) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+    List[i] = i * 2;
+    i++;
+  }
+}
+
+// Verify do loop is recognized after multi-option pragma clang loop directive.
+void do_test(int *List, int Length) {
+  // CHECK: define {{.*}} @_Z7do_test
+  int i = 0;
+
+#pragma GCC nounroll
+  do {
+    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+    List[i] = i * 2;
+    i++;
+  } while (i < Length);
+}
+
+// Verify for loop is recognized after unroll pragma.
+void for_test(int *List, int Length) {
+// CHECK: define {{.*}} @_Z8for_test
+#pragma GCC unroll 8
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+    List[i] = i * 2;
+  }
+}
+
+// Verify c++11 for range loop is recognized after unroll pragma.
+void for_range_test() {
+  // CHECK: define {{.*}} @_Z14for_range_test
+  double List[100];
+
+#pragma GCC unroll(4)
+  for (int i : List) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+    List[i] = i;
+  }
+}
+
+#define UNROLLCOUNT 8
+
+// Verify defines are correctly resolved in unroll pragmas.
+void for_define_test(int *List, int Length, int Value) {
+// CHECK: define {{.*}} @_Z15for_define_test
+#pragma GCC unroll(UNROLLCOUNT)
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+    List[i] = i * Value;
+  }
+}
+
+// Verify metadata is generated when template is used.
+template <typename A>
+void for_template_test(A *List, int Length, A Value) {
+// CHECK: define {{.*}} @_Z13template_test
+#pragma GCC unroll 8
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_6:.*]]
+    List[i] = i * Value;
+  }
+}
+
+// Verify define is resolved correctly when template is used.
+template <typename A>
+void for_template_define_test(A *List, int Length, A Value) {
+// CHECK: define {{.*}} @_Z24for_template_define_test
+
+#pragma GCC unroll(UNROLLCOUNT)
+  for (int i = 0; i < Length; i++) {
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_7:.*]]
+    List[i] = i * Value;
+  }
+}
+
+#undef UNROLLCOUNT
+
+// Use templates defined above. Test verifies metadata is generated correctly.
+void template_test(double *List, int Length) {
+  double Value = 10;
+
+  for_template_test<double>(List, Length, Value);
+  for_template_define_test<double>(List, Length, Value);
+}
+
+// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], [[MP:![0-9]+]], ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"}
+// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2:.*]], ![[UNROLL_DISABLE:.*]]}
+// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], [[MP]], ![[UNROLL_8:.*]]}
+// CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8}
+// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_4:.*]]}
+// CHECK: ![[UNROLL_4]] = !{!"llvm.loop.unroll.count", i32 4}
+// CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[UNROLL_8:.*]]}

From 6823af0ca858b54e09e5be61a19d067ccd0bd6b7 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Fri, 16 Apr 2021 10:40:17 -0400
Subject: [PATCH 33/52] [HIP] Support hipRTC in header

hipRTC compiles HIP device code at run time. Since the system may not
have development tools installed, when a HIP program is compiled through
hipRTC, there is no standard C or C++ header available. As such, the HIP
headers should not depend on standard C or C++ headers when used
with hipRTC. Basically when hipRTC is used, HIP headers only provides
definitions of HIP device API functions. This is in line with what nvRTC does.

This patch adds support of hipRTC to HIP headers in clang. Basically hipRTC
defines a macro __HIPCC_RTC__ when compile HIP code at run time. When
this macro is defined, HIP headers do not include standard C/C++ headers.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D100652
---
 clang/lib/Headers/__clang_hip_cmath.h         |  2 ++
 clang/lib/Headers/__clang_hip_math.h          |  4 +++
 .../lib/Headers/__clang_hip_runtime_wrapper.h | 28 ++++++++++++++++++-
 clang/test/Headers/hip-header.hip             | 20 +++++++++++++
 clang/test/lit.cfg.py                         |  2 +-
 5 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Headers/hip-header.hip

diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
index 18871e63bfa0..632d46e47f8b 100644
--- a/clang/lib/Headers/__clang_hip_cmath.h
+++ b/clang/lib/Headers/__clang_hip_cmath.h
@@ -14,6 +14,7 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+#if !defined(__HIPCC_RTC__)
 #if defined(__cplusplus)
 #include <limits>
 #include <type_traits>
@@ -21,6 +22,7 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
+#endif // __HIPCC_RTC__
 
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 14d91c66b352..35cf0ad3ba6c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -13,11 +13,13 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+#if !defined(__HIPCC_RTC__)
 #if defined(__cplusplus)
 #include <algorithm>
 #endif
 #include <limits.h>
 #include <stdint.h>
+#endif // __HIPCC_RTC__
 
 #pragma push_macro("__DEVICE__")
 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
@@ -1260,6 +1262,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
 __DEVICE__
 double min(double __x, double __y) { return fmin(__x, __y); }
 
+#if !defined(__HIPCC_RTC__)
 __host__ inline static int min(int __arg1, int __arg2) {
   return std::min(__arg1, __arg2);
 }
@@ -1267,6 +1270,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
 __host__ inline static int max(int __arg1, int __arg2) {
   return std::max(__arg1, __arg2);
 }
+#endif // __HIPCC_RTC__
 #endif
 
 #pragma pop_macro("__DEVICE__")
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index 4fd8f23d49f3..8ee5566b33cf 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -18,9 +18,27 @@
 
 #if __HIP__
 
+#if !defined(__HIPCC_RTC__)
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
+#else
+typedef __SIZE_TYPE__ size_t;
+// Define macros which are needed to declare HIP device API's without standard
+// C/C++ headers. This is for readability so that these API's can be written
+// the same way as non-hipRTC use case. These macros need to be popped so that
+// they do not pollute users' name space.
+#pragma push_macro("NULL")
+#pragma push_macro("uint32_t")
+#pragma push_macro("uint64_t")
+#pragma push_macro("CHAR_BIT")
+#pragma push_macro("INT_MAX")
+#define NULL (void *)0
+#define uint32_t __UINT32_TYPE__
+#define uint64_t __UINT64_TYPE__
+#define CHAR_BIT __CHAR_BIT__
+#define INT_MAX __INTMAX_MAX__
+#endif // __HIPCC_RTC__
 
 #define __host__ __attribute__((host))
 #define __device__ __attribute__((device))
@@ -54,6 +72,7 @@ static inline __device__ void *free(void *__ptr) {
 #include <__clang_hip_libdevice_declares.h>
 #include <__clang_hip_math.h>
 
+#if !defined(__HIPCC_RTC__)
 #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
 #include <__clang_cuda_math_forward_declares.h>
 #include <__clang_hip_cmath.h>
@@ -62,9 +81,16 @@ static inline __device__ void *free(void *__ptr) {
 #include <algorithm>
 #include <complex>
 #include <new>
+#endif // __HIPCC_RTC__
 #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
 
 #define __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ 1
-
+#if defined(__HIPCC_RTC__)
+#pragma pop_macro("NULL")
+#pragma pop_macro("uint32_t")
+#pragma pop_macro("uint64_t")
+#pragma pop_macro("CHAR_BIT")
+#pragma pop_macro("INT_MAX")
+#endif // __HIPCC_RTC__
 #endif // __HIP__
 #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__
diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip
new file mode 100644
index 000000000000..943254b63f2e
--- /dev/null
+++ b/clang/test/Headers/hip-header.hip
@@ -0,0 +1,20 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D__HIPCC_RTC__ | FileCheck %s
+
+// expected-no-diagnostics
+
+// CHECK-LABEL: amdgpu_kernel void @_Z4kernPff
+__global__ void kern(float *x, float y) {
+  *x = sin(y);
+}
+
+// CHECK-LABEL: define{{.*}} i64 @_Z11test_size_tv
+// CHEC: ret i64 8
+__device__ size_t test_size_t() {
+  return sizeof(size_t);
+}
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index 21b674539a30..63ba8160c661 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -25,7 +25,7 @@
 config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.c', '.cpp', '.i', '.cppm', '.m', '.mm', '.cu',
+config.suffixes = ['.c', '.cpp', '.i', '.cppm', '.m', '.mm', '.cu', '.hip',
                    '.ll', '.cl', '.clcpp', '.s', '.S', '.modulemap', '.test', '.rs', '.ifs']
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'

From f8f60297d7724d3fe6144320b3fdb0f47129aaa2 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sat, 17 Apr 2021 18:01:32 +0200
Subject: [PATCH 34/52] [libcxx][doc] Fixes typos.

---
 libcxx/docs/OneRangesProposalStatus.csv | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/OneRangesProposalStatus.csv b/libcxx/docs/OneRangesProposalStatus.csv
index e99d56cbe9ae..81c940fdcf72 100644
--- a/libcxx/docs/OneRangesProposalStatus.csv
+++ b/libcxx/docs/OneRangesProposalStatus.csv
@@ -1,4 +1,4 @@
-Section,Description,Dependencies,Asignee,Patch,Complete
+Section,Description,Dependencies,Assignee,Patch,Complete
 [concepts],"Normally this would be more fine-grained, but it’s already been implemented.",,Christopher Di Bella,,✅
 [tuple.helper],Update <tuple> includes.,,,,
 [function.objects],"Comparison functions: equal_to, less, etc.",[concepts],Zoe Carver,D100429,
@@ -19,15 +19,15 @@ forward_iterator: D100275
 bidirectional_iterator: D100278",
 [indirectcallable.indirectinvocable],"indirectly_unary_invocable, indirectly_regular_unary_invocable, indirectly_unary_predicate, indirectly_binary_predicate, indirectly_equivalence_relation, and indirectly_strict_weak_order.","[concepts], [readable.traits]: iter_value_t, [iterator.traits]",,,
 [projected],,[iterator.concepts],,,
-[common.alg.req]: pt. 1,"indirectly_movable, indirectly_movable_sorable, indirectly_copyable, and indirectly_copyable_storable.",[iterator.concepts],,,
+[common.alg.req]: pt. 1,"indirectly_movable, indirectly_movable_storable, indirectly_copyable, and indirectly_copyable_storable.",[iterator.concepts],,,
 [common.alg.req]: pt. 2,indirectly_swappable,"[iterator.concepts], [iterator.cust.swap]",,,
 [common.alg.req]: pt. 3,indirectly_comparable,[projected],,,
 [common.alg.req]: pt. 4,"Note: could be done with pt. 1.           …                              permutable, mergeable, and sortable",[iterator.concepts],,,
 [std.iterator.tags],,[iterator.traits],,,
-[range.iterator.opearations.advance],ranges::advance,[iterator.concepts],,,
-[range.iterator.opearations.distance],ranges::distance,"[iterator.concepts], [range.range]",,,
-[range.iterator.opearations.next],ranges::next,[iterator.concepts],,,
-[range.iterator.opearations.prev],ranges::prev,[iterator.concepts],,,
+[range.iterator.operations.advance],ranges::advance,[iterator.concepts],,,
+[range.iterator.operations.distance],ranges::distance,"[iterator.concepts], [range.range]",,,
+[range.iterator.operations.next],ranges::next,[iterator.concepts],,,
+[range.iterator.operations.prev],ranges::prev,[iterator.concepts],,,
 [predef.iterators],Updates to predefined iterators.,"[iterator.concepts], [iterator.cust.swap], [iterator.cust.move]",,,
 [move.sentinel],,[concepts] … Note: for testing it may be beneficial to have completed [predef.iterators]. ,,,
 [common.iterator],,"[iterator.concepts], [iterator.cust.swap], [iterator.cust.move]",,,
@@ -44,7 +44,7 @@ bidirectional_iterator: D100278",
 [range.refinements],"OutputRange, InputRange, ForwardRange, BidirectionalRange, RandomAccessRange, ContiguousRange, CommonRange, ViewableRange","[ranges.syn]: pt. 2, [range.range]",Christopher Di Bella,"input_range: D100271
 forward_range: D100275
 bidirectional_range: D100278",
-[view.interface],[range.utility.helpers] and view_interface,"[ranges.syn]: pt. 2, [range.view], [range.iterator.opearations.prev], [range.refinements]",,,
+[view.interface],[range.utility.helpers] and view_interface,"[ranges.syn]: pt. 2, [range.view], [range.iterator.operations.prev], [range.refinements]",,,
 [range.subrange],,[view.interface],,,
 [range.all],view::all,"[range.subrange], [range.view.ref]",,,
 [range.view.ref],ref-view,[view.interface],,,

From bbf01f96b5ccc1dcb4d1d47cb55292c27c698dbb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 17 Apr 2021 15:22:54 +0100
Subject: [PATCH 35/52] [ADT] Take graph as const & in some post-order
 iterators (NFC).

This patch updates a couple of functions that unnecessarily took the
input graph by value, when it was not needed. They can take the graph by
const-reference instead, which does not require GraphT to provide a copy
constructor.

Split off from D100169.
---
 llvm/include/llvm/ADT/PostOrderIterator.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index 9586a8f3c8ee..3ab76d7cf740 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -140,15 +140,15 @@ class po_iterator : public po_iterator_storage<SetType, ExtStorage> {
 
 public:
   // Provide static "constructors"...
-  static po_iterator begin(GraphT G) {
+  static po_iterator begin(const GraphT &G) {
     return po_iterator(GT::getEntryNode(G));
   }
-  static po_iterator end(GraphT G) { return po_iterator(); }
+  static po_iterator end(const GraphT &G) { return po_iterator(); }
 
-  static po_iterator begin(GraphT G, SetType &S) {
+  static po_iterator begin(const GraphT &G, SetType &S) {
     return po_iterator(GT::getEntryNode(G), S);
   }
-  static po_iterator end(GraphT G, SetType &S) { return po_iterator(S); }
+  static po_iterator end(const GraphT &G, SetType &S) { return po_iterator(S); }
 
   bool operator==(const po_iterator &x) const {
     return VisitStack == x.VisitStack;

From ebc6608fb79057eaed27435d62d5dea0979bd9d3 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Fri, 16 Apr 2021 22:29:06 +0100
Subject: [PATCH 36/52] [AST] Remove args from LocationCall

This class initially had args to be generic to future needs. In
particular, I thought that source location introspection should show the
getBeginLoc of CallExpr args and the getArgLoc of
TemplateSpecializationLocInfo etc.  However, that is probably best left
out of source location introspection because it involves node traversal.

If something like this is needed in the future, it can be added in the
future.

Differential Revision: https://reviews.llvm.org/D100688
---
 clang/include/clang/Tooling/NodeIntrospection.h     |  6 ------
 clang/lib/Tooling/NodeIntrospection.cpp             | 11 +----------
 clang/unittests/Introspection/IntrospectionTest.cpp | 11 +----------
 3 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/clang/include/clang/Tooling/NodeIntrospection.h b/clang/include/clang/Tooling/NodeIntrospection.h
index 5489a67efa22..c8518ea63546 100644
--- a/clang/include/clang/Tooling/NodeIntrospection.h
+++ b/clang/include/clang/Tooling/NodeIntrospection.h
@@ -38,14 +38,9 @@ class LocationCall : public llvm::ThreadSafeRefCountedBase<LocationCall> {
   LocationCall(SharedLocationCall on, std::string name,
                LocationCallFlags flags = NoFlags)
       : m_flags(flags), m_on(std::move(on)), m_name(std::move(name)) {}
-  LocationCall(SharedLocationCall on, std::string name,
-               std::vector<std::string> args, LocationCallFlags flags = NoFlags)
-      : m_flags(flags), m_on(std::move(on)), m_name(std::move(name)),
-        m_args(std::move(args)) {}
 
   LocationCall *on() const { return m_on.get(); }
   StringRef name() const { return m_name; }
-  ArrayRef<std::string> args() const { return m_args; }
   bool returnsPointer() const { return m_flags & ReturnsPointer; }
   bool isCast() const { return m_flags & IsCast; }
 
@@ -53,7 +48,6 @@ class LocationCall : public llvm::ThreadSafeRefCountedBase<LocationCall> {
   LocationCallFlags m_flags;
   SharedLocationCall m_on;
   std::string m_name;
-  std::vector<std::string> m_args;
 };
 
 class LocationCallFormatterCpp {
diff --git a/clang/lib/Tooling/NodeIntrospection.cpp b/clang/lib/Tooling/NodeIntrospection.cpp
index 0e3ef3c6a01e..6a8d7267f8ae 100644
--- a/clang/lib/Tooling/NodeIntrospection.cpp
+++ b/clang/lib/Tooling/NodeIntrospection.cpp
@@ -29,16 +29,7 @@ void LocationCallFormatterCpp::print(const LocationCall &Call,
       OS << '.';
   }
 
-  OS << Call.name();
-  if (Call.args().empty()) {
-    OS << "()";
-    return;
-  }
-  OS << '(' << Call.args().front();
-  for (const std::string &Arg : Call.args().drop_front()) {
-    OS << ", " << Arg;
-  }
-  OS << ')';
+  OS << Call.name() << "()";
 }
 
 std::string LocationCallFormatterCpp::format(const LocationCall &Call) {
diff --git a/clang/unittests/Introspection/IntrospectionTest.cpp b/clang/unittests/Introspection/IntrospectionTest.cpp
index ad21748f11f8..e56963aa41a6 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -61,16 +61,7 @@ class LocationCallFormatterSimple {
       print(*On, OS);
       OS << '.';
     }
-    OS << Call.name();
-    if (Call.args().empty()) {
-      OS << "()";
-      return;
-    }
-    OS << '(' << Call.args().front();
-    for (const std::string &Arg : Call.args().drop_front()) {
-      OS << ", " << Arg;
-    }
-    OS << ')';
+    OS << Call.name() << "()";
   }
 
   static std::string format(const LocationCall &Call) {

From 21bef4e11e48d5d4bff7a23babbd420e86dd420a Mon Sep 17 00:00:00 2001
From: Jennifer Chukwu <jnyfaah@gmail.com>
Date: Sat, 17 Apr 2021 20:34:06 +0530
Subject: [PATCH 37/52] [NFC] Fixed Typos

Reviewed By: xgupta

Differential Revision: https://reviews.llvm.org/D100705
---
 clang/docs/UsersManual.rst  | 2 +-
 libcxx/docs/UsingLibcxx.rst | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 9053398c937a..ec521155a7f7 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -686,7 +686,7 @@ generate a reproducer for warnings or errors while using modules.
 .. option:: -gen-reproducer
 
   Generates preprocessed source files, a reproducer script and if relevant, a
-  cache containing: built module pcm's and all headers needed to rebuilt the
+  cache containing: built module pcm's and all headers needed to rebuild the
   same modules.
 
 .. _rpass:
diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst
index 0e6d92bbbcd7..f7de6f64c10d 100644
--- a/libcxx/docs/UsingLibcxx.rst
+++ b/libcxx/docs/UsingLibcxx.rst
@@ -114,10 +114,10 @@ Using libc++ with GCC
 GCC does not provide a way to switch from libstdc++ to libc++. You must manually
 configure the compile and link commands.
 
-In particular you must tell GCC to remove the libstdc++ include directories
+In particular, you must tell GCC to remove the libstdc++ include directories
 using ``-nostdinc++`` and to not link libstdc++.so using ``-nodefaultlibs``.
 
-Note that ``-nodefaultlibs`` removes all of the standard system libraries and
+Note that ``-nodefaultlibs`` removes all the standard system libraries and
 not just libstdc++ so they must be manually linked. For example:
 
 .. code-block:: bash
@@ -151,7 +151,7 @@ thread safety annotations.
 
 **_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS**:
   This macro is used to enable -Wthread-safety annotations on libc++'s
-  ``std::mutex`` and ``std::lock_guard``. By default these annotations are
+  ``std::mutex`` and ``std::lock_guard``. By default, these annotations are
   disabled and must be manually enabled by the user.
 
 **_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS**:

From a5e579cc2b8db6f088b7401623ad35d6c702c553 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 17 Apr 2021 10:47:04 -0700
Subject: [PATCH 38/52] [TableGen] Remove local SmallSet from
 TypeSetByHwMode::insert.

This keeps track of which modes are in VVT so we can find out
if a mode is missing later. But we can just ask VVT whether it
has a particular mode.
---
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 4ab2f1c69911..72fe9faf81f8 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -111,10 +111,8 @@ bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
   bool ContainsDefault = false;
   MVT DT = MVT::Other;
 
-  SmallSet<unsigned, 4> Modes;
   for (const auto &P : VVT) {
     unsigned M = P.first;
-    Modes.insert(M);
     // Make sure there exists a set for each specific mode from VVT.
     Changed |= getOrCreate(M).insert(P.second).second;
     // Cache VVT's default mode.
@@ -128,7 +126,7 @@ bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
   // modes in "this" that do not exist in VVT.
   if (ContainsDefault)
     for (auto &I : *this)
-      if (!Modes.count(I.first))
+      if (!VVT.hasMode(I.first))
         Changed |= I.second.insert(DT).second;
 
   return Changed;

From 141945f950e2f3fd58bc6db3afb5d3b10cb2b0c9 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Sat, 17 Apr 2021 18:58:05 +0100
Subject: [PATCH 39/52] [AST] Enable AST node introspection on WIN32

---
 clang/lib/Tooling/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt
index 0da3dbd0b927..e90b681e16f4 100644
--- a/clang/lib/Tooling/CMakeLists.txt
+++ b/clang/lib/Tooling/CMakeLists.txt
@@ -24,7 +24,6 @@ string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} ${PATH_LIB_END} -1 PATH_TAIL)
 string(CONCAT BINARY_INCLUDE_DIR ${PATH_HEAD} "/include/clang/" ${PATH_TAIL})
 
 if (NOT Python3_EXECUTABLE
-    OR WIN32
     OR APPLE
     OR GENERATOR_IS_MULTI_CONFIG
     OR NOT LLVM_NATIVE_ARCH IN_LIST LLVM_TARGETS_TO_BUILD

From 7b75a3a8ebf04b321e07f17aac5bfe9cb7c656d1 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Sat, 17 Apr 2021 13:56:23 -0400
Subject: [PATCH 40/52] [Support] ThreadPool tests: silence warning unused
 variable 'It'

---
 llvm/unittests/Support/ThreadPool.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp
index 30a8924d16c4..a560d5069bff 100644
--- a/llvm/unittests/Support/ThreadPool.cpp
+++ b/llvm/unittests/Support/ThreadPool.cpp
@@ -246,8 +246,10 @@ TEST_F(ThreadPoolTest, AffinityMask) {
     // Ensure the threads only ran on CPUs 0-3.
     // NOTE: Don't use ASSERT* here because this runs in a subprocess,
     // and will show up as un-executed in the parent.
-    for (auto &It : ThreadsUsed)
-      assert(It.getData().front() < 16UL);
+    assert(llvm::all_of(ThreadsUsed,
+                        [](auto &T) { return T.getData().front() < 16UL; }) &&
+           "Threads ran on more CPUs than expected! The affinity mask does not "
+           "seem to work.");
     return;
   }
   std::string Executable =

From 488a19d00cbaec479f8c5c298556d2246978f9e6 Mon Sep 17 00:00:00 2001
From: Sylvain Audi <sylvain.audi@ubisoft.com>
Date: Fri, 16 Apr 2021 21:45:42 -0400
Subject: [PATCH 41/52] [clang-scan-deps] Support double-dashes in clang
 command lines

This fixes argument injection in clang command lines, by adding them before "--".

Previously, the arguments were injected at the end of the command line and could be added after "--", which would be wrongly interpreted as input file paths.

This fix is needed for a subsequent patch, see D92191.

Differential Revision: https://reviews.llvm.org/D95099
---
 .../ClangScanDeps/Inputs/regular_cdb.json     |  2 +-
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/clang/test/ClangScanDeps/Inputs/regular_cdb.json b/clang/test/ClangScanDeps/Inputs/regular_cdb.json
index 902c0b7761fb..938880c1304f 100644
--- a/clang/test/ClangScanDeps/Inputs/regular_cdb.json
+++ b/clang/test/ClangScanDeps/Inputs/regular_cdb.json
@@ -11,7 +11,7 @@
 },
 {
   "directory": "DIR",
-  "command": "clang -E DIR/regular_cdb_input.cpp -IInputs -o adena.o",
+  "command": "clang -E -IInputs -o adena.o -- DIR/regular_cdb_input.cpp",
   "file": "DIR/regular_cdb_input.cpp"
 }
 ]
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index a8ff42ab104c..e3ea098d8211 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -418,14 +418,15 @@ int main(int argc, const char **argv) {
         bool HasMQ = false;
         bool HasMD = false;
         bool HasResourceDir = false;
-        // We need to find the last -o value.
-        if (!Args.empty()) {
-          std::size_t Idx = Args.size() - 1;
-          for (auto It = Args.rbegin(); It != Args.rend(); ++It) {
-            StringRef Arg = Args[Idx];
+        auto FlagsEnd = llvm::find(Args, "--");
+        if (FlagsEnd != Args.begin()) {
+          // Reverse scan, starting at the end or at the element before "--".
+          auto R = llvm::make_reverse_iterator(FlagsEnd);
+          for (auto I = R, E = Args.rend(); I != E; ++I) {
+            StringRef Arg = *I;
             if (LastO.empty()) {
-              if (Arg == "-o" && It != Args.rbegin())
-                LastO = Args[Idx + 1];
+              if (Arg == "-o" && I != R)
+                LastO = I[-1]; // Next argument (reverse iterator)
               else if (Arg.startswith("-o"))
                 LastO = Arg.drop_front(2).str();
             }
@@ -437,12 +438,11 @@ int main(int argc, const char **argv) {
               HasMD = true;
             if (Arg == "-resource-dir")
               HasResourceDir = true;
-            --Idx;
           }
         }
         // If there's no -MT/-MQ Driver would add -MT with the value of the last
         // -o option.
-        tooling::CommandLineArguments AdjustedArgs = Args;
+        tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
         AdjustedArgs.push_back("-o");
         AdjustedArgs.push_back("/dev/null");
         if (!HasMT && !HasMQ) {
@@ -472,6 +472,7 @@ int main(int argc, const char **argv) {
             AdjustedArgs.push_back(std::string(ResourceDir));
           }
         }
+        AdjustedArgs.insert(AdjustedArgs.end(), FlagsEnd, Args.end());
         return AdjustedArgs;
       });
   AdjustingCompilations->appendArgumentsAdjuster(

From bb26fa8c286bf524ed9235c3e293ad22ecf3e984 Mon Sep 17 00:00:00 2001
From: Sylvain Audi <sylvain.audi@ubisoft.com>
Date: Sat, 17 Apr 2021 12:49:40 -0400
Subject: [PATCH 42/52] [clang-scan-deps] Add support for clang-cl

clang-scan-deps contains some command line parsing and modifications.
This patch adds support for clang-cl command options.

Differential Revision: https://reviews.llvm.org/D92191
---
 .../Inputs/has_include_if_elif.json           |  5 ++
 .../Inputs/header_stat_before_open_cdb.json   |  5 ++
 .../Inputs/headerwithdirname.json             |  5 ++
 .../headerwithdirnamefollowedbyinclude.json   |  5 ++
 .../Inputs/modules_cdb_clangcl.json           | 22 +++++
 .../test/ClangScanDeps/Inputs/no-werror.json  |  5 ++
 .../Inputs/regular_cdb_clangcl.json           | 17 ++++
 .../Inputs/static-analyzer-cdb.json           |  9 +-
 .../Inputs/strip_diag_serialize.json          |  5 ++
 .../Inputs/target-filename-cdb.json           | 45 ++++++++++
 .../ClangScanDeps/Inputs/vfsoverlay_cdb.json  |  5 ++
 clang/test/ClangScanDeps/error.cpp            |  5 ++
 .../ClangScanDeps/has_include_if_elif.cpp     |  7 ++
 .../ClangScanDeps/header_stat_before_open.m   |  6 ++
 .../test/ClangScanDeps/headerwithdirname.cpp  |  5 ++
 .../headerwithdirnamefollowedbyinclude.cpp    |  5 ++
 clang/test/ClangScanDeps/modules-full.cpp     | 18 ++--
 clang/test/ClangScanDeps/modules.cpp          | 13 +++
 clang/test/ClangScanDeps/no-werror.cpp        |  4 +
 clang/test/ClangScanDeps/regular_cdb.cpp      | 23 +++++
 clang/test/ClangScanDeps/static-analyzer.c    |  9 +-
 .../ClangScanDeps/strip_diag_serialize.cpp    |  4 +-
 clang/test/ClangScanDeps/target-filename.cpp  | 27 ++++++
 clang/test/ClangScanDeps/vfsoverlay.cpp       |  5 ++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 86 ++++++++++++++-----
 25 files changed, 314 insertions(+), 31 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/Inputs/modules_cdb_clangcl.json
 create mode 100644 clang/test/ClangScanDeps/Inputs/regular_cdb_clangcl.json

diff --git a/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json b/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json
index 36ca006b0329..8fcc7ea34a9b 100644
--- a/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json
+++ b/clang/test/ClangScanDeps/Inputs/has_include_if_elif.json
@@ -3,5 +3,10 @@
   "directory": "DIR",
   "command": "clang -E DIR/has_include_if_elif2.cpp -IInputs",
   "file": "DIR/has_include_if_elif2.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs -- DIR/has_include_if_elif2_clangcl.cpp",
+  "file": "DIR/has_include_if_elif2_clangcl.cpp"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json b/clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
index c5f027e9fd28..b99b541b1298 100644
--- a/clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
+++ b/clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
@@ -3,5 +3,10 @@
   "directory": "DIR",
   "command": "clang -E DIR/header_stat_before_open_input.m -iframework Inputs/frameworks",
   "file": "DIR/header_stat_before_open_input.m"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E -Xclang -iframework -Xclang Inputs/frameworks -- DIR/header_stat_before_open_input_clangcl.m",
+  "file": "DIR/header_stat_before_open_input_clangcl.m"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/headerwithdirname.json b/clang/test/ClangScanDeps/Inputs/headerwithdirname.json
index 2ae561935bec..ac12c92308fd 100644
--- a/clang/test/ClangScanDeps/Inputs/headerwithdirname.json
+++ b/clang/test/ClangScanDeps/Inputs/headerwithdirname.json
@@ -3,5 +3,10 @@
       "directory": "DIR",
       "command": "clang -c -IDIR -IDIR/foodir -IInputs DIR/headerwithdirname_input.cpp",
       "file": "DIR/headerwithdirname_input.cpp"
+    },
+    {
+      "directory": "DIR",
+      "command": "clang-cl /c /IDIR /IDIR/foodir -IInputs -- DIR/headerwithdirname_input_clangcl.cpp",
+      "file": "DIR/headerwithdirname_input_clangcl.cpp"
     }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json b/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json
index de7759d0b110..1886328a9c3e 100644
--- a/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json
+++ b/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json
@@ -3,5 +3,10 @@
       "directory": "DIR",
       "command": "clang -c -IDIR -IInputs DIR/headerwithdirname_input.cpp",
       "file": "DIR/headerwithdirname_input.cpp"
+    },
+    {
+      "directory": "DIR",
+      "command": "clang-cl /c /IDIR /IInputs -- DIR/headerwithdirname_input_clangcl.cpp",
+      "file": "DIR/headerwithdirname_input_clangcl.cpp"
     }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/modules_cdb_clangcl.json b/clang/test/ClangScanDeps/Inputs/modules_cdb_clangcl.json
new file mode 100644
index 000000000000..a1f12867c45d
--- /dev/null
+++ b/clang/test/ClangScanDeps/Inputs/modules_cdb_clangcl.json
@@ -0,0 +1,22 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /D INCLUDE_HEADER2 /clang:-MD /clang:-MF /clang:DIR/modules_cdb2_clangcl.d /clang:-fmodules /clang:-fcxx-modules /clang:-fmodules-cache-path=DIR/module-cache_clangcl /clang:-fimplicit-modules /clang:-fimplicit-module-maps -- DIR/modules_cdb_input2.cpp",
+  "file": "DIR/modules_cdb_input2.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /clang:-fmodules /clang:-fcxx-modules /clang:-fmodules-cache-path=DIR/module-cache_clangcl /clang:-fimplicit-modules /clang:-fimplicit-module-maps -- DIR/modules_cdb_input.cpp",
+  "file": "DIR/modules_cdb_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /clang:-fmodules /clang:-fcxx-modules /clang:-fmodules-cache-path=DIR/module-cache_clangcl /clang:-fimplicit-modules /clang:-fimplicit-module-maps -o a.o -- DIR/modules_cdb_input.cpp",
+  "file": "DIR/modules_cdb_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /clang:-fmodules /clang:-fcxx-modules /clang:-fmodules-cache-path=DIR/module-cache_clangcl /clang:-fimplicit-modules /clang:-fimplicit-module-maps -o b.o -- DIR/modules_cdb_input.cpp",
+  "file": "DIR/modules_cdb_input.cpp"
+}
+]
diff --git a/clang/test/ClangScanDeps/Inputs/no-werror.json b/clang/test/ClangScanDeps/Inputs/no-werror.json
index 2d1f46cf3af2..7438b670e853 100644
--- a/clang/test/ClangScanDeps/Inputs/no-werror.json
+++ b/clang/test/ClangScanDeps/Inputs/no-werror.json
@@ -3,5 +3,10 @@
   "directory": "DIR",
   "command": "clang -E DIR/no-werror_input.cpp -IInputs -std=c++17 -Weverything -Werror",
   "file": "DIR/no-werror.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /std:c++17 -Weverything -Werror -- DIR/no-werror_input_clangcl.cpp",
+  "file": "DIR/no-werror_clangcl.cpp"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/regular_cdb_clangcl.json b/clang/test/ClangScanDeps/Inputs/regular_cdb_clangcl.json
new file mode 100644
index 000000000000..bdb84bc9172f
--- /dev/null
+++ b/clang/test/ClangScanDeps/Inputs/regular_cdb_clangcl.json
@@ -0,0 +1,17 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang --driver-mode=cl /E /IInputs /D INCLUDE_HEADER2 /clang:-MD /clang:-MF /clang:DIR/regular_cdb2_clangcl.d -- DIR/regular_cdb_input2.cpp",
+  "file": "DIR/regular_cdb_input2.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs -- DIR/regular_cdb_input.cpp",
+  "file": "DIR/regular_cdb_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs /Foadena.o -- DIR/regular_cdb_input.cpp",
+  "file": "DIR/regular_cdb_input.cpp"
+}
+]
diff --git a/clang/test/ClangScanDeps/Inputs/static-analyzer-cdb.json b/clang/test/ClangScanDeps/Inputs/static-analyzer-cdb.json
index a466d87afaa4..a375f457aecb 100644
--- a/clang/test/ClangScanDeps/Inputs/static-analyzer-cdb.json
+++ b/clang/test/ClangScanDeps/Inputs/static-analyzer-cdb.json
@@ -1,7 +1,12 @@
 [
 {
   "directory": "DIR",
-  "command": "clang --analyze DIR/static-analyzer.c",
-  "file": "DIR/static-analyzer.c"
+  "command": "clang --analyze DIR/static-analyzer_clang.c",
+  "file": "DIR/static-analyzer_clang.c"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl --analyze -- DIR/static-analyzer_clangcl.c",
+  "file": "DIR/static-analyzer_clangcl.c"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/strip_diag_serialize.json b/clang/test/ClangScanDeps/Inputs/strip_diag_serialize.json
index a774d95a3b02..7af1acdc378a 100644
--- a/clang/test/ClangScanDeps/Inputs/strip_diag_serialize.json
+++ b/clang/test/ClangScanDeps/Inputs/strip_diag_serialize.json
@@ -3,5 +3,10 @@
   "directory": "DIR",
   "command": "clang -E -fsyntax-only DIR/strip_diag_serialize_input.cpp --serialize-diagnostics /does/not/exist",
   "file": "DIR/strip_diag_serialize_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E --serialize-diagnostics A:/does/not/exist -- DIR/strip_diag_serialize_input_clangcl.cpp",
+  "file": "DIR/strip_diag_serialize_input_clangcl.cpp"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/target-filename-cdb.json b/clang/test/ClangScanDeps/Inputs/target-filename-cdb.json
index af2e43e4e171..90a3c81e57f4 100644
--- a/clang/test/ClangScanDeps/Inputs/target-filename-cdb.json
+++ b/clang/test/ClangScanDeps/Inputs/target-filename-cdb.json
@@ -18,5 +18,50 @@
   "directory": "DIR",
   "command": "clang -E DIR/target-filename_input.cpp -o first.o -o last.o",
   "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E -o clangcl-a.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E -oclangcl-b.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /o clangcl-c.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /oclangcl-d.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /Foclangcl-e.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E -o clangcl-firstf.o -o clangcl-lastf.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /oclangcl-firstg.o /Foclangcl-lastg.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /Foclangcl-firsth.o -o clangcl-midh.o /oclangcl-lasth.o -- DIR/target-filename_input.cpp",
+  "file": "DIR/target-filename_input.cpp"
 }
 ]
diff --git a/clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json b/clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
index 779203bac674..1b501fa7af41 100644
--- a/clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
+++ b/clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
@@ -3,5 +3,10 @@
   "directory": "DIR",
   "command": "clang -E DIR/vfsoverlay_input.cpp -IInputs -ivfsoverlay DIR/vfsoverlay.yaml",
   "file": "DIR/vfsoverlay_input.cpp"
+},
+{
+  "directory": "DIR",
+  "command": "clang-cl /E /IInputs -Xclang -ivfsoverlay -Xclang DIR/vfsoverlay.yaml -- DIR/vfsoverlay_input_clangcl.cpp",
+  "file": "DIR/vfsoverlay_input_clangcl.cpp"
 }
 ]
diff --git a/clang/test/ClangScanDeps/error.cpp b/clang/test/ClangScanDeps/error.cpp
index e4e052527890..e18bf302af26 100644
--- a/clang/test/ClangScanDeps/error.cpp
+++ b/clang/test/ClangScanDeps/error.cpp
@@ -3,11 +3,16 @@
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/regular_cdb_input.cpp
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb_clangcl.json > %t_clangcl.cdb
 //
 // RUN: not clang-scan-deps -compilation-database %t.cdb -j 1 2>%t.dir/errs
 // RUN: echo EOF >> %t.dir/errs
 // RUN: FileCheck %s --input-file %t.dir/errs
 
+// RUN: not clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 2>%t.dir/errs_clangcl
+// RUN: echo EOF >> %t.dir/errs_clangcl
+// RUN: FileCheck %s --input-file %t.dir/errs_clangcl
+
 #include "missing.h"
 
 // CHECK: Error while scanning dependencies
diff --git a/clang/test/ClangScanDeps/has_include_if_elif.cpp b/clang/test/ClangScanDeps/has_include_if_elif.cpp
index dd56ecac69db..17eda40c1662 100644
--- a/clang/test/ClangScanDeps/has_include_if_elif.cpp
+++ b/clang/test/ClangScanDeps/has_include_if_elif.cpp
@@ -2,6 +2,7 @@
 // RUN: rm -rf %t.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/has_include_if_elif2.cpp
+// RUN: cp %s %t.dir/has_include_if_elif2_clangcl.cpp
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
 // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header2.h
@@ -36,3 +37,9 @@
 // CHECK-NEXT: Inputs{{/|\\}}header2.h
 // CHECK-NEXT: Inputs{{/|\\}}header3.h
 // CHECK-NEXT: Inputs{{/|\\}}header4.h
+
+// CHECK: has_include_if_elif2_clangcl.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
+// CHECK-NEXT: Inputs{{/|\\}}header2.h
+// CHECK-NEXT: Inputs{{/|\\}}header3.h
+// CHECK-NEXT: Inputs{{/|\\}}header4.h
diff --git a/clang/test/ClangScanDeps/header_stat_before_open.m b/clang/test/ClangScanDeps/header_stat_before_open.m
index f9f7d240f2c1..ce6f58f434ae 100644
--- a/clang/test/ClangScanDeps/header_stat_before_open.m
+++ b/clang/test/ClangScanDeps/header_stat_before_open.m
@@ -2,6 +2,7 @@
 // RUN: rm -rf %t.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/header_stat_before_open_input.m
+// RUN: cp %s %t.dir/header_stat_before_open_input_clangcl.m
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp -R %S/Inputs/frameworks %t.dir/Inputs/frameworks
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/header_stat_before_open_cdb.json > %t.cdb
@@ -16,3 +17,8 @@
 // CHECK-NEXT: header_stat_before_open_input.m
 // CHECK-NEXT: Inputs{{/|\\}}frameworks{{/|\\}}Framework.framework{{/|\\}}Headers{{/|\\}}Framework.h
 // CHECK-NEXT: Inputs{{/|\\}}frameworks{{/|\\}}Framework.framework{{/|\\}}PrivateHeaders{{/|\\}}PrivateHeader.h
+
+// CHECK: header_stat_before_open_input_clangcl.o
+// CHECK-NEXT: header_stat_before_open_input_clangcl.m
+// CHECK-NEXT: Inputs{{/|\\}}frameworks{{/|\\}}Framework.framework{{/|\\}}Headers{{/|\\}}Framework.h
+// CHECK-NEXT: Inputs{{/|\\}}frameworks{{/|\\}}Framework.framework{{/|\\}}PrivateHeaders{{/|\\}}PrivateHeader.h
diff --git a/clang/test/ClangScanDeps/headerwithdirname.cpp b/clang/test/ClangScanDeps/headerwithdirname.cpp
index b0f60333aa4c..b6c7f796e90e 100644
--- a/clang/test/ClangScanDeps/headerwithdirname.cpp
+++ b/clang/test/ClangScanDeps/headerwithdirname.cpp
@@ -4,6 +4,7 @@
 // RUN: mkdir -p %t.dir
 // RUN: mkdir -p %t.dir/foodir
 // RUN: cp %s %t.dir/headerwithdirname_input.cpp
+// RUN: cp %s %t.dir/headerwithdirname_input_clangcl.cpp
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/foodir %t.dir/Inputs/foodir
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/headerwithdirname.json > %t.cdb
@@ -15,3 +16,7 @@
 // CHECK: headerwithdirname_input.o
 // CHECK-NEXT: headerwithdirname_input.cpp
 // CHECK-NEXT: Inputs{{/|\\}}foodir
+
+// CHECK: headerwithdirname_input_clangcl.o
+// CHECK-NEXT: headerwithdirname_input_clangcl.cpp
+// CHECK-NEXT: Inputs{{/|\\}}foodir
diff --git a/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp b/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp
index e8e8a69e82e2..1f9133dc74e8 100644
--- a/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp
+++ b/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp
@@ -7,6 +7,7 @@
 
 // RUN: cp %S/Inputs/header.h %t.dir/foodir/foodirheader.h
 // RUN: cp %s %t.dir/headerwithdirname_input.cpp
+// RUN: cp %s %t.dir/headerwithdirname_input_clangcl.cpp
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/foodir %t.dir/Inputs/foodir
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/headerwithdirnamefollowedbyinclude.json > %t.cdb
@@ -19,3 +20,7 @@
 // CHECK: headerwithdirname_input.o
 // CHECK-NEXT: headerwithdirname_input.cpp
 // CHECK-NEXT: Inputs{{/|\\}}foodir
+
+// CHECK: headerwithdirname_input_clangcl.o
+// CHECK-NEXT: headerwithdirname_input_clangcl.cpp
+// CHECK-NEXT: Inputs{{/|\\}}foodir
diff --git a/clang/test/ClangScanDeps/modules-full.cpp b/clang/test/ClangScanDeps/modules-full.cpp
index 1e6a740c2739..db2549670392 100644
--- a/clang/test/ClangScanDeps/modules-full.cpp
+++ b/clang/test/ClangScanDeps/modules-full.cpp
@@ -8,12 +8,18 @@
 // RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h
 // RUN: cp %S/Inputs/module.modulemap %t.dir/Inputs/module.modulemap
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb.json > %t.cdb
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb
 //
 // RUN: echo %t.dir > %t.result
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 4 -full-command-line \
 // RUN:   -mode preprocess-minimized-sources -format experimental-full >> %t.result
 // RUN: cat %t.result | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK %s
 
+// RUN: echo %t.dir > %t_clangcl.result
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 4 -full-command-line \
+// RUN:   -mode preprocess-minimized-sources -format experimental-full >> %t_clangcl.result
+// RUN: cat %t_clangcl.result | sed 's/\\/\//g' | FileCheck --check-prefixes=CHECK %s
+
 // FIXME: Backslash issues.
 // XFAIL: system-windows
 
@@ -33,7 +39,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-fno-implicit-modules",
 // CHECK-NEXT:         "-fno-implicit-module-maps",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H1]]/header2-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H1]]/header2-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "context-hash": "[[CONTEXT_HASH_H1]]",
@@ -84,7 +90,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-fno-implicit-modules",
 // CHECK-NEXT:         "-fno-implicit-module-maps",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "file-deps": [
@@ -103,7 +109,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-fno-implicit-modules",
 // CHECK-NEXT:         "-fno-implicit-module-maps",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "file-deps": [
@@ -122,7 +128,7 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-fno-implicit-modules",
 // CHECK-NEXT:         "-fno-implicit-module-maps",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H2]]/header1-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "file-deps": [
@@ -141,9 +147,9 @@
 // CHECK-NEXT:       "command-line": [
 // CHECK-NEXT:         "-fno-implicit-modules",
 // CHECK-NEXT:         "-fno-implicit-module-maps",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H1]]/header2-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H1]]/header2-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap",
-// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache/[[CONTEXT_HASH_H1]]/header1-{{[A-Z0-9]+}}.pcm",
+// CHECK-NEXT:         "-fmodule-file=[[PREFIX]]/module-cache{{(_clangcl)?}}/[[CONTEXT_HASH_H1]]/header1-{{[A-Z0-9]+}}.pcm",
 // CHECK-NEXT:         "-fmodule-map-file=[[PREFIX]]/Inputs/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "file-deps": [
diff --git a/clang/test/ClangScanDeps/modules.cpp b/clang/test/ClangScanDeps/modules.cpp
index 599fcd1b4353..f0d97dc0c5c2 100644
--- a/clang/test/ClangScanDeps/modules.cpp
+++ b/clang/test/ClangScanDeps/modules.cpp
@@ -1,6 +1,8 @@
 // RUN: rm -rf %t.dir
 // RUN: rm -rf %t.cdb
+// RUN: rm -rf %t_clangcl.cdb
 // RUN: rm -rf %t.module-cache
+// RUN: rm -rf %t.module-cache_clangcl
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/modules_cdb_input.cpp
 // RUN: cp %s %t.dir/modules_cdb_input2.cpp
@@ -9,9 +11,12 @@
 // RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h
 // RUN: cp %S/Inputs/module.modulemap %t.dir/Inputs/module.modulemap
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb.json > %t.cdb
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/modules_cdb_clangcl.json > %t_clangcl.cdb
 //
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
 //
 // The output order is non-deterministic when using more than one thread,
 // so check the output using two runs. Note that the 'NOT' check is not used
@@ -20,12 +25,20 @@
 //
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
 
 #include "header.h"
 
diff --git a/clang/test/ClangScanDeps/no-werror.cpp b/clang/test/ClangScanDeps/no-werror.cpp
index 95407c51b3cc..11f1b718dac5 100644
--- a/clang/test/ClangScanDeps/no-werror.cpp
+++ b/clang/test/ClangScanDeps/no-werror.cpp
@@ -2,6 +2,7 @@
 // RUN: rm -rf %t.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/no-werror_input.cpp
+// RUN: cp %s %t.dir/no-werror_input_clangcl.cpp
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/sys-header.h %t.dir/Inputs/sys-header.h
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/no-werror.json > %t.cdb
@@ -14,3 +15,6 @@
 
 // CHECK: no-werror_input.cpp
 // CHECK-NEXT: Inputs{{/|\\}}sys-header.h
+
+// CHECK: no-werror_input_clangcl.cpp
+// CHECK-NEXT: Inputs{{/|\\}}sys-header.h
diff --git a/clang/test/ClangScanDeps/regular_cdb.cpp b/clang/test/ClangScanDeps/regular_cdb.cpp
index 8fb94350e4c2..d7ba2519067e 100644
--- a/clang/test/ClangScanDeps/regular_cdb.cpp
+++ b/clang/test/ClangScanDeps/regular_cdb.cpp
@@ -1,5 +1,6 @@
 // RUN: rm -rf %t.dir
 // RUN: rm -rf %t.cdb
+// RUN: rm -rf %t_clangcl.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/regular_cdb_input.cpp
 // RUN: cp %s %t.dir/regular_cdb_input2.cpp
@@ -7,17 +8,28 @@
 // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
 // RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb_clangcl.json > %t_clangcl.cdb
 //
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
+
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \
 // RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess | \
+// RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
+
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources \
 // RUN:   -skip-excluded-pp-ranges=0 | FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 1 -mode preprocess-minimized-sources \
+// RUN:   -skip-excluded-pp-ranges=0 | FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO,CHECK3 %s
 //
 // Make sure we didn't produce any dependency files!
 // RUN: not cat %t.dir/regular_cdb.d
+// RUN: not cat %t.dir/regular_cdb_clangcl.d
 // RUN: not cat %t.dir/regular_cdb2.d
+// RUN: not cat %t.dir/regular_cdb2_clangcl.d
 //
 // The output order is non-deterministic when using more than one thread,
 // so check the output using two runs. Note that the 'NOT' check is not used
@@ -26,12 +38,23 @@
 //
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
+
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
+
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
+
 // RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t_clangcl.cdb -j 2 -mode preprocess | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
 
 #include "header.h"
 
diff --git a/clang/test/ClangScanDeps/static-analyzer.c b/clang/test/ClangScanDeps/static-analyzer.c
index c4af9b076bb2..8edbf3eaae20 100644
--- a/clang/test/ClangScanDeps/static-analyzer.c
+++ b/clang/test/ClangScanDeps/static-analyzer.c
@@ -1,7 +1,9 @@
 // RUN: rm -rf %t.dir
 // RUN: rm -rf %t-cdb.json
 // RUN: mkdir -p %t.dir
-// RUN: cp %s %t.dir/static-analyzer.c
+// Change file name to avoid false positives in CHECK, since "static-analyzer.c" is found in %S.
+// RUN: cp %s %t.dir/static-analyzer_clang.c
+// RUN: cp %s %t.dir/static-analyzer_clangcl.c
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/header.h %t.dir/Inputs/analyze_header_input.h
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/static-analyzer-cdb.json > %t-cdb.json
@@ -12,5 +14,8 @@
 #include "Inputs/analyze_header_input.h"
 #endif
 
-// CHECK: analyze_header_input.h
+// CHECK: static-analyzer_clang.c
+// CHECK-NEXT: analyze_header_input.h
 
+// CHECK: static-analyzer_clangcl.c
+// CHECK-NEXT: analyze_header_input.h
diff --git a/clang/test/ClangScanDeps/strip_diag_serialize.cpp b/clang/test/ClangScanDeps/strip_diag_serialize.cpp
index ec62e7513481..d9f758882027 100644
--- a/clang/test/ClangScanDeps/strip_diag_serialize.cpp
+++ b/clang/test/ClangScanDeps/strip_diag_serialize.cpp
@@ -2,10 +2,12 @@
 // RUN: rm -rf %t.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/strip_diag_serialize_input.cpp
+// RUN: cp %s %t.dir/strip_diag_serialize_input_clangcl.cpp
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/strip_diag_serialize.json > %t.cdb
 //
-// RUN: clang-scan-deps -compilation-database %t.cdb 2>&1 | FileCheck %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 2>&1 | FileCheck %s
 // CHECK-NOT: unable to open file
 // CHECK: strip_diag_serialize_input.cpp
+// CHECK: strip_diag_serialize_input_clangcl.cpp
 
 #warning "diagnostic"
diff --git a/clang/test/ClangScanDeps/target-filename.cpp b/clang/test/ClangScanDeps/target-filename.cpp
index c47166b21e4e..02084ee9b372 100644
--- a/clang/test/ClangScanDeps/target-filename.cpp
+++ b/clang/test/ClangScanDeps/target-filename.cpp
@@ -18,3 +18,30 @@
 
 // CHECK-NEXT: last.o:
 // CHECK-NEXT: target-filename_input.cpp
+
+// CHECK: target-filename_input.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-a.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-b.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-c.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-d.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-e.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-lastf.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-lastg.o:
+// CHECK-NEXT: target-filename_input.cpp
+
+// CHECK-NEXT: clangcl-lasth.o:
+// CHECK-NEXT: target-filename_input.cpp
diff --git a/clang/test/ClangScanDeps/vfsoverlay.cpp b/clang/test/ClangScanDeps/vfsoverlay.cpp
index 517738943ab5..b3a2b23daad7 100644
--- a/clang/test/ClangScanDeps/vfsoverlay.cpp
+++ b/clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -2,6 +2,7 @@
 // RUN: rm -rf %t.cdb
 // RUN: mkdir -p %t.dir
 // RUN: cp %s %t.dir/vfsoverlay_input.cpp
+// RUN: cp %s %t.dir/vfsoverlay_input_clangcl.cpp
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
 // RUN: mkdir %t.dir/Inputs
 // RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
@@ -15,3 +16,7 @@
 // CHECK: vfsoverlay_input.o
 // CHECK-NEXT: vfsoverlay_input.cpp
 // CHECK-NEXT: Inputs{{/|\\}}header.h
+
+// CHECK: vfsoverlay_input_clangcl.o
+// CHECK-NEXT: vfsoverlay_input_clangcl.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index e3ea098d8211..baae2c615c62 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -12,6 +12,8 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/InitLLVM.h"
@@ -49,7 +51,8 @@ class ResourceDirectoryCache {
   /// option and cache the results for reuse. \returns resource directory path
   /// associated with the given invocation command or empty string if the
   /// compiler path is NOT an absolute path.
-  StringRef findResourceDir(const tooling::CommandLineArguments &Args) {
+  StringRef findResourceDir(const tooling::CommandLineArguments &Args,
+                            bool ClangCLMode) {
     if (Args.size() < 1)
       return "";
 
@@ -65,8 +68,12 @@ class ResourceDirectoryCache {
     if (CachedResourceDir != Cache.end())
       return CachedResourceDir->second;
 
-    std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName,
-                                                "-print-resource-dir"};
+    std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
+    if (ClangCLMode)
+      PrintResourceDirArgs.push_back("/clang:-print-resource-dir");
+    else
+      PrintResourceDirArgs.push_back("-print-resource-dir");
+
     llvm::SmallString<64> OutputFile, ErrorFile;
     llvm::sys::fs::createTemporaryFile("print-resource-dir-output",
                                        "" /*no-suffix*/, OutputFile);
@@ -418,24 +425,52 @@ int main(int argc, const char **argv) {
         bool HasMQ = false;
         bool HasMD = false;
         bool HasResourceDir = false;
+        bool ClangCLMode = false;
         auto FlagsEnd = llvm::find(Args, "--");
         if (FlagsEnd != Args.begin()) {
+          ClangCLMode =
+              llvm::sys::path::stem(Args[0]).contains_lower("clang-cl") ||
+              llvm::is_contained(Args, "--driver-mode=cl");
+
           // Reverse scan, starting at the end or at the element before "--".
           auto R = llvm::make_reverse_iterator(FlagsEnd);
           for (auto I = R, E = Args.rend(); I != E; ++I) {
             StringRef Arg = *I;
-            if (LastO.empty()) {
-              if (Arg == "-o" && I != R)
-                LastO = I[-1]; // Next argument (reverse iterator)
-              else if (Arg.startswith("-o"))
-                LastO = Arg.drop_front(2).str();
+            if (ClangCLMode) {
+              if (LastO.empty()) {
+                // With clang-cl, the output obj file can be specified with
+                // "/opath", "/o path", "/Fopath", and the dash counterparts.
+                // Also, clang-cl adds ".obj" extension if none is found.
+                if ((Arg == "-o" || Arg == "/o") && I != R)
+                  LastO = I[-1]; // Next argument (reverse iterator)
+                else if (Arg.startswith("/Fo") || Arg.startswith("-Fo"))
+                  LastO = Arg.drop_front(3).str();
+                else if (Arg.startswith("/o") || Arg.startswith("-o"))
+                  LastO = Arg.drop_front(2).str();
+
+                if (!LastO.empty() && !llvm::sys::path::has_extension(LastO))
+                  LastO.append(".obj");
+              }
+              if (Arg == "/clang:-MT")
+                HasMT = true;
+              if (Arg == "/clang:-MQ")
+                HasMQ = true;
+              if (Arg == "/clang:-MD")
+                HasMD = true;
+            } else {
+              if (LastO.empty()) {
+                if (Arg == "-o" && I != R)
+                  LastO = I[-1]; // Next argument (reverse iterator)
+                else if (Arg.startswith("-o"))
+                  LastO = Arg.drop_front(2).str();
+              }
+              if (Arg == "-MT")
+                HasMT = true;
+              if (Arg == "-MQ")
+                HasMQ = true;
+              if (Arg == "-MD")
+                HasMD = true;
             }
-            if (Arg == "-MT")
-              HasMT = true;
-            if (Arg == "-MQ")
-              HasMQ = true;
-            if (Arg == "-MD")
-              HasMD = true;
             if (Arg == "-resource-dir")
               HasResourceDir = true;
           }
@@ -444,18 +479,29 @@ int main(int argc, const char **argv) {
         // -o option.
         tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
         AdjustedArgs.push_back("-o");
+#ifdef _WIN32
+        AdjustedArgs.push_back("nul");
+#else
         AdjustedArgs.push_back("/dev/null");
+#endif
         if (!HasMT && !HasMQ) {
-          AdjustedArgs.push_back("-M");
-          AdjustedArgs.push_back("-MT");
           // We're interested in source dependencies of an object file.
+          std::string FileNameArg;
           if (!HasMD) {
             // FIXME: We are missing the directory unless the -o value is an
             // absolute path.
-            AdjustedArgs.push_back(!LastO.empty() ? LastO
-                                                  : getObjFilePath(FileName));
+            FileNameArg = !LastO.empty() ? LastO : getObjFilePath(FileName);
+          } else {
+            FileNameArg = std::string(FileName);
+          }
+          if (ClangCLMode) {
+            AdjustedArgs.push_back("/clang:-M");
+            AdjustedArgs.push_back("/clang:-MT");
+            AdjustedArgs.push_back(Twine("/clang:", FileNameArg).str());
           } else {
-            AdjustedArgs.push_back(std::string(FileName));
+            AdjustedArgs.push_back("-M");
+            AdjustedArgs.push_back("-MT");
+            AdjustedArgs.push_back(std::move(FileNameArg));
           }
         }
         AdjustedArgs.push_back("-Xclang");
@@ -466,7 +512,7 @@ int main(int argc, const char **argv) {
 
         if (!HasResourceDir) {
           StringRef ResourceDir =
-              ResourceDirCache.findResourceDir(Args);
+              ResourceDirCache.findResourceDir(Args, ClangCLMode);
           if (!ResourceDir.empty()) {
             AdjustedArgs.push_back("-resource-dir");
             AdjustedArgs.push_back(std::string(ResourceDir));

From 0c6ee502ebfb0ac9b1ba098075c4f2e502e1af80 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Sat, 17 Apr 2021 11:55:14 -0700
Subject: [PATCH 43/52] [JITLink] Add testcase that was accidentally left out
 of 19e402d2b34.

---
 ...ame_section_name_different_segment_names.s | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/MachO_same_section_name_different_segment_names.s

diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_same_section_name_different_segment_names.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_same_section_name_different_segment_names.s
new file mode 100644
index 000000000000..e1f750cee7be
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_same_section_name_different_segment_names.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %s
+# RUN: llvm-jitlink -noexec %t
+#
+# Check that JITLink handles MachO sections with the same section name but
+# different segment names.
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.build_version macos, 11, 0	sdk_version 11, 1
+	.globl	_main
+	.p2align	4, 0x90
+_main:                                  ## @main
+	xorl	%eax, %eax
+	retq
+
+        .section	__TEXT,__const
+        .globl _a
+_a:
+        .quad   42
+
+	.section	__DATA,__const
+	.globl	_b
+	.p2align	3
+_b:
+	.quad	42
+
+.subsections_via_symbols

From fe9a5a806e83063d0a39cefded39ab40c8e546ba Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 17 Apr 2021 20:55:57 +0200
Subject: [PATCH 44/52] [LoopUnroll] Make some tests more robust (NFC)

Replace branch on undef by branch on unknown condition.
---
 llvm/test/Transforms/LoopUnroll/pr31718.ll           |  6 +++---
 llvm/test/Transforms/LoopUnroll/runtime-li.ll        |  6 +++---
 .../LoopUnroll/runtime-loop-multiexit-dom-verify.ll  | 12 ++++++------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/LoopUnroll/pr31718.ll b/llvm/test/Transforms/LoopUnroll/pr31718.ll
index 014ef7e501ec..a06e67ace740 100644
--- a/llvm/test/Transforms/LoopUnroll/pr31718.ll
+++ b/llvm/test/Transforms/LoopUnroll/pr31718.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: {{.*}} = phi i32 [ %d.0, %h3.1 ]
 ; CHECK: br label %exit
 
-define void @main() local_unnamed_addr #0 {
+define void @main(i1 %c) local_unnamed_addr #0 {
 ph1:
   br label %h1
 
@@ -29,7 +29,7 @@ h2:
   br label %h3
 
 h3:
-  br i1 undef, label %latch3, label %exit
+  br i1 %c, label %latch3, label %exit
 
 latch3:
   br i1 false, label %exit3, label %h3
@@ -43,7 +43,7 @@ latch2:
   br i1 %cmp, label %h2, label %exit2
 
 exit2:
-  br i1 undef, label %latch1, label %ph2
+  br i1 %c, label %latch1, label %ph2
 
 latch1:                 ; preds = %exit2
   %1 = load i32, i32* @b, align 4
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-li.ll b/llvm/test/Transforms/LoopUnroll/runtime-li.ll
index a4a9b9202891..cc7150480cb6 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-li.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-li.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: remark: {{.*}}: unrolled loop by a factor of 2 with run-time trip count
 ; CHECK: @widget
 ; CHECK: ret void
-define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2) local_unnamed_addr {
+define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2, i1 %c) local_unnamed_addr {
 entry:
   br label %header.outer
 
@@ -19,10 +19,10 @@ header.outer:                                     ; preds = %latch.outer, %entry
 header.inner:                                     ; preds = %latch.inner, %header.outer
   %tmp5 = load i64, i64* %q1, align 8
   %tmp6 = icmp eq double* %p, %arg
-  br i1 undef, label %exiting.inner, label %latch.outer
+  br i1 %c, label %exiting.inner, label %latch.outer
 
 exiting.inner:                                     ; preds = %latch.inner, %header.outer
-  br i1 undef, label %latch.inner, label %latch.outer
+  br i1 %c, label %latch.inner, label %latch.outer
 
 latch.inner:                                      ; preds = %header.inner
   store i64 %tmp5, i64* %q2, align 8
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index d354d6842c7f..5b8e5ef7fd1f 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -174,7 +174,7 @@ otherexit:                                           ; preds = %exiting
 
 ; exit block (%exitB) has an exiting block and another exit block as predecessors.
 ; exiting block comes from inner loop.
-define void @test5() {
+define void @test5(i1 %c) {
 ; CHECK-LABEL: test5
 ; CHECK-LABEL: bb1:
 ; CHECK-NEXT:   br i1 false, label %outerH.prol.preheader, label %outerH.prol.loopexit
@@ -198,10 +198,10 @@ outerH:                                              ; preds = %outerLatch, %bb1
   br label %innerH
 
 innerH:                                              ; preds = %innerLatch, %outerH
-  br i1 undef, label %innerexiting, label %otherexitB
+  br i1 %c, label %innerexiting, label %otherexitB
 
 innerexiting:                                             ; preds = %innerH
-  br i1 undef, label %innerLatch, label %exitB
+  br i1 %c, label %innerLatch, label %exitB
 
 innerLatch:                                             ; preds = %innerexiting
   %tmp13 = fcmp olt double undef, 2.000000e+00
@@ -225,7 +225,7 @@ otherexitB:                                              ; preds = %innerH
 
 ; Blocks reachable from exits (not_zero44) have the IDom as the block within the loop (Header).
 ; Update the IDom to the preheader.
-define void @test6() {
+define void @test6(i1 %c) {
 ; CHECK-LABEL: test6
 ; CHECK-LABEL: header.prol.preheader:
 ; CHECK-NEXT:    br label %header.prol
@@ -234,7 +234,7 @@ define void @test6() {
 ; CHECK-NEXT:    %indvars.iv.prol = phi i64 [ undef, %header.prol.preheader ], [ %indvars.iv.next.prol, %latch.prol ]
 ; CHECK-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
 
-; CHECK-NEXT:    br i1 false, label %latch.prol, label %otherexit.loopexit1
+; CHECK-NEXT:    br i1 %c, label %latch.prol, label %otherexit.loopexit1
 
 ; CHECK-LABEL: header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.prol, %latch.prol ]
@@ -252,7 +252,7 @@ entry:
 
 header:                                          ; preds = %latch, %entry
   %indvars.iv = phi i64 [ undef, %entry ], [ %indvars.iv.next, %latch ]
-  br i1 undef, label %latch, label %otherexit
+  br i1 %c, label %latch, label %otherexit
 
 latch:                                         ; preds = %header
   %indvars.iv.next = add nsw i64 %indvars.iv, 2

From c456ab78ae3aa9791e6137c06f30cc55f45a7cb2 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 17 Apr 2021 20:58:27 +0200
Subject: [PATCH 45/52] [LoopUnroll] Regenerate test checks (NFC)

---
 llvm/test/Transforms/LoopUnroll/scevunroll.ll | 155 ++++++++++++++----
 1 file changed, 124 insertions(+), 31 deletions(-)

diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
index ea473e1ccab6..4d9ae39f721f 100644
--- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info | FileCheck %s
 ;
 ; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
@@ -6,12 +7,41 @@
 ; tests may check that SCEV is properly invalidated between passes.
 
 ; Completely unroll loops without a canonical IV.
-;
-; CHECK-LABEL: @sansCanonical(
-; CHECK-NOT: phi
-; CHECK-NOT: icmp
-; CHECK: ret
 define i32 @sansCanonical(i32* %base) nounwind {
+; CHECK-LABEL: @sansCanonical(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr inbounds i32, i32* [[BASE:%.*]], i64 9
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[ADR]], align 8
+; CHECK-NEXT:    [[ADR_1:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 8
+; CHECK-NEXT:    [[TMP_1:%.*]] = load i32, i32* [[ADR_1]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_1:%.*]] = add i32 [[TMP]], [[TMP_1]]
+; CHECK-NEXT:    [[ADR_2:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 7
+; CHECK-NEXT:    [[TMP_2:%.*]] = load i32, i32* [[ADR_2]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_2:%.*]] = add i32 [[SUM_NEXT_1]], [[TMP_2]]
+; CHECK-NEXT:    [[ADR_3:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 6
+; CHECK-NEXT:    [[TMP_3:%.*]] = load i32, i32* [[ADR_3]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_3:%.*]] = add i32 [[SUM_NEXT_2]], [[TMP_3]]
+; CHECK-NEXT:    [[ADR_4:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 5
+; CHECK-NEXT:    [[TMP_4:%.*]] = load i32, i32* [[ADR_4]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_4:%.*]] = add i32 [[SUM_NEXT_3]], [[TMP_4]]
+; CHECK-NEXT:    [[ADR_5:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 4
+; CHECK-NEXT:    [[TMP_5:%.*]] = load i32, i32* [[ADR_5]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_5:%.*]] = add i32 [[SUM_NEXT_4]], [[TMP_5]]
+; CHECK-NEXT:    [[ADR_6:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 3
+; CHECK-NEXT:    [[TMP_6:%.*]] = load i32, i32* [[ADR_6]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_6:%.*]] = add i32 [[SUM_NEXT_5]], [[TMP_6]]
+; CHECK-NEXT:    [[ADR_7:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 2
+; CHECK-NEXT:    [[TMP_7:%.*]] = load i32, i32* [[ADR_7]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_7:%.*]] = add i32 [[SUM_NEXT_6]], [[TMP_7]]
+; CHECK-NEXT:    [[ADR_8:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 1
+; CHECK-NEXT:    [[TMP_8:%.*]] = load i32, i32* [[ADR_8]], align 8
+; CHECK-NEXT:    [[SUM_NEXT_8:%.*]] = add i32 [[SUM_NEXT_7]], [[TMP_8]]
+; CHECK-NEXT:    [[TMP_9:%.*]] = load i32, i32* [[BASE]], align 8
+; CHECK-NEXT:    ret i32 [[SUM_NEXT_8]]
+;
 entry:
   br label %while.body
 
@@ -34,12 +64,29 @@ exit:
 ; case, the computed trip count based on a canonical IV is *not* for a
 ; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
 ; unrolling does not.
-;
-; CHECK-LABEL: @earlyLoopTest(
-; CHECK: tail:
-; CHECK-NOT: br
-; CHECK: br i1 %cmp2, label %loop, label %exit2
 define i64 @earlyLoopTest(i64* %base) nounwind {
+; CHECK-LABEL: @earlyLoopTest(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ]
+; CHECK-NEXT:    [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ]
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i64, i64* [[ADR]], align 4
+; CHECK-NEXT:    [[S_NEXT]] = add i64 [[S]], [[VAL]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[INC]], 4
+; CHECK-NEXT:    br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]]
+; CHECK:       tail:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ]
+; CHECK-NEXT:    ret i64 [[S_LCSSA]]
+; CHECK:       exit2:
+; CHECK-NEXT:    [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ]
+; CHECK-NEXT:    ret i64 [[S_NEXT_LCSSA1]]
+;
 entry:
   br label %loop
 
@@ -65,13 +112,24 @@ exit2:
 }
 
 ; SCEV properly unrolls multi-exit loops.
-;
-; CHECK-LABEL: @multiExit(
-; CHECK: getelementptr i32, i32* %base, i32 %iv
-; CHECK-NEXT: load i32, i32*
-; CHECK: br i1 false, label %l2, label %exit1
-; CHECK: br i1 true, label %l1, label %exit2
 define i32 @multiExit(i32* %base) nounwind {
+; CHECK-LABEL: @multiExit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[L1:%.*]]
+; CHECK:       l1:
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ]
+; CHECK-NEXT:    [[INC1]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
+; CHECK-NEXT:    br i1 false, label [[L2]], label [[EXIT1:%.*]]
+; CHECK:       l2:
+; CHECK-NEXT:    br i1 true, label [[L1]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       exit2:
+; CHECK-NEXT:    [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ]
+; CHECK-NEXT:    ret i32 [[VAL_LCSSA1]]
+;
 entry:
   br label %l1
 l1:
@@ -97,12 +155,29 @@ exit2:
 ; a known trip count, regardless of the early exit trip counts. The
 ; LoopUnroll utility uses this assumption to optimize the latch
 ; block's branch.
-;
-; CHECK-LABEL: @multiExitIncomplete(
-; CHECK: l3:
-; CHECK-NOT: br
-; CHECK:   br i1 %cmp3, label %l1, label %exit3
 define i32 @multiExitIncomplete(i32* %base) nounwind {
+; CHECK-LABEL: @multiExitIncomplete(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[L1:%.*]]
+; CHECK:       l1:
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ]
+; CHECK-NEXT:    [[INC1]] = add nuw i32 [[IV1]], 1
+; CHECK-NEXT:    [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADR]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]]
+; CHECK:       l2:
+; CHECK-NEXT:    br i1 true, label [[L3]], label [[EXIT2:%.*]]
+; CHECK:       l3:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i32 2
+; CHECK:       exit3:
+; CHECK-NEXT:    ret i32 3
+;
 entry:
   br label %l1
 l1:
@@ -131,11 +206,19 @@ exit3:
 
 ; When loop unroll merges a loop exit with one of its parent loop's
 ; exits, SCEV must forget its ExitNotTaken info.
-;
-; CHECK-LABEL: @nestedUnroll(
-; CHECK-NOT: br i1
-; CHECK: for.body87:
 define void @nestedUnroll() nounwind {
+; CHECK-LABEL: @nestedUnroll(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    br label [[FOR_BODY38:%.*]]
+; CHECK:       for.body38:
+; CHECK-NEXT:    br label [[FOR_BODY43:%.*]]
+; CHECK:       for.body43:
+; CHECK-NEXT:    br label [[FOR_BODY87:%.*]]
+; CHECK:       for.body87:
+; CHECK-NEXT:    br label [[FOR_BODY87]]
+;
 entry:
   br label %for.inc
 
@@ -177,13 +260,23 @@ for.body87:
 ; iteration via the early exit. So loop unrolling cannot assume that
 ; the loop latch's exit count of zero is an upper bound on the number
 ; of iterations.
-;
-; CHECK-LABEL: @nsw_latch(
-; CHECK: for.body:
-; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
-; CHECK: return:
-; CHECK: %b.03.lcssa = phi i32 [ 8, %for.body ], [ 0, %for.cond ]
 define void @nsw_latch(i32* %a) nounwind {
+; CHECK-LABEL: @nsw_latch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[B_03]], 8
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    br i1 false, label [[RETURN]], label [[FOR_BODY]]
+; CHECK:       return:
+; CHECK-NEXT:    [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
+; CHECK-NEXT:    store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %for.body
 

From d91f864ced6eebe8db35c092c8da29e87cd882b2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 17 Apr 2021 18:16:07 +0100
Subject: [PATCH 46/52] [ADT] Update RPOT to work with specializations of
 different types.

At the moment, ReversePostOrderTraversal performs a post-order walk on
the entry node of the passed in graph, rather than the graph type
itself.

If GT::NodeRef is the same as GraphT, everything works as expected and
this is the case for the current uses in-tree. But it does not work as
expected if GraphT != GT::NodeRef. In that case, we either fail to build
(if there is no GraphTrait specialization for GT:NodeRef) or we pick the
GraphTrait specialization for GT::NodeRef, instead of the specialization
of GraphT.

Both the depth-first and post-order iterators pick the expected
specalization and this patch updates ReversePostOrderTraversal to
delegate to po_begin & po_end to pick the right specialization, rather
than forcing using GraphTraits<GT::NodeRef>, by first getting the entry
node.

This makes `ReversePostOrderTraversal<Graph<6>> RPOT(G);` build and
work as expected in the test.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D100169
---
 llvm/include/llvm/ADT/PostOrderIterator.h    |  6 ++--
 llvm/unittests/ADT/PostOrderIteratorTest.cpp | 38 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index 3ab76d7cf740..74314d39d825 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -292,15 +292,15 @@ class ReversePostOrderTraversal {
 
   std::vector<NodeRef> Blocks; // Block list in normal PO order
 
-  void Initialize(NodeRef BB) {
-    std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks));
+  void Initialize(const GraphT &G) {
+    std::copy(po_begin(G), po_end(G), std::back_inserter(Blocks));
   }
 
 public:
   using rpo_iterator = typename std::vector<NodeRef>::reverse_iterator;
   using const_rpo_iterator = typename std::vector<NodeRef>::const_reverse_iterator;
 
-  ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
+  ReversePostOrderTraversal(const GraphT &G) { Initialize(G); }
 
   // Because we want a reverse post order, use reverse iterators from the vector
   rpo_iterator begin() { return Blocks.rbegin(); }
diff --git a/llvm/unittests/ADT/PostOrderIteratorTest.cpp b/llvm/unittests/ADT/PostOrderIteratorTest.cpp
index 8e53247fc2f6..e9ab251f4229 100644
--- a/llvm/unittests/ADT/PostOrderIteratorTest.cpp
+++ b/llvm/unittests/ADT/PostOrderIteratorTest.cpp
@@ -9,6 +9,8 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "gtest/gtest.h"
+#include "TestGraph.h"
+
 using namespace llvm;
 
 namespace {
@@ -33,4 +35,40 @@ TEST(PostOrderIteratorTest, Compiles) {
   auto PIExt = po_ext_end(NullBB, Ext);
   PIExt.insertEdge(Optional<BasicBlock *>(), NullBB);
 }
+
+// Test post-order and reverse post-order traversals for simple graph type.
+TEST(PostOrderIteratorTest, PostOrderAndReversePostOrderTraverrsal) {
+  Graph<6> G;
+  G.AddEdge(0, 1);
+  G.AddEdge(0, 2);
+  G.AddEdge(0, 3);
+  G.AddEdge(1, 4);
+  G.AddEdge(2, 5);
+  G.AddEdge(5, 2);
+  G.AddEdge(2, 4);
+  G.AddEdge(1, 4);
+
+  SmallVector<int> FromIterator;
+  for (auto N : post_order(G))
+    FromIterator.push_back(N->first);
+  EXPECT_EQ(6u, FromIterator.size());
+  EXPECT_EQ(4, FromIterator[0]);
+  EXPECT_EQ(1, FromIterator[1]);
+  EXPECT_EQ(5, FromIterator[2]);
+  EXPECT_EQ(2, FromIterator[3]);
+  EXPECT_EQ(3, FromIterator[4]);
+  EXPECT_EQ(0, FromIterator[5]);
+  FromIterator.clear();
+
+  ReversePostOrderTraversal<Graph<6>> RPOT(G);
+  for (auto N : RPOT)
+    FromIterator.push_back(N->first);
+  EXPECT_EQ(6u, FromIterator.size());
+  EXPECT_EQ(0, FromIterator[0]);
+  EXPECT_EQ(3, FromIterator[1]);
+  EXPECT_EQ(2, FromIterator[2]);
+  EXPECT_EQ(5, FromIterator[3]);
+  EXPECT_EQ(1, FromIterator[4]);
+  EXPECT_EQ(4, FromIterator[5]);
+}
 }

From 863d5c4e4dcf1ffd3093438a5fa61c3cd1dec704 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 17 Apr 2021 17:03:20 -0400
Subject: [PATCH 47/52] [libc++] Remove hard tabs, U+00AD, and U+200B from all
 libc++ headers. NFCI.

---
 libcxx/include/atomic      |  2 +-
 libcxx/include/barrier     |  4 +--
 libcxx/include/cmath       |  4 +--
 libcxx/include/iterator    |  2 +-
 libcxx/include/numeric     |  4 +--
 libcxx/include/type_traits | 50 +++++++++++++++++++-------------------
 6 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/libcxx/include/atomic b/libcxx/include/atomic
index f95385507413..3423642d1b60 100644
--- a/libcxx/include/atomic
+++ b/libcxx/include/atomic
@@ -669,7 +669,7 @@ static_assert((is_same<underlying_type<memory_order>::type, __memory_order_under
   "unexpected underlying type for std::memory_order");
 
 #if defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) || \
-	defined(_LIBCPP_ATOMIC_ONLY_USE_BUILTINS)
+    defined(_LIBCPP_ATOMIC_ONLY_USE_BUILTINS)
 
 // [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because
 // the default operator= in an object is not volatile, a byte-by-byte copy
diff --git a/libcxx/include/barrier b/libcxx/include/barrier
index be213a6895ef..e650773d430f 100644
--- a/libcxx/include/barrier
+++ b/libcxx/include/barrier
@@ -309,11 +309,11 @@ public:
     {
         __b.wait(_VSTD::move(__phase));
     }
-	_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
     void arrive_and_wait()
     {
         wait(arrive());
-	}
+    }
     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
     void arrive_and_drop()
     {
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index 9c493ce38a53..1efd4e47c22c 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -622,9 +622,9 @@ _Fp __lerp(_Fp __a, _Fp __b, _Fp __t) noexcept {
     if (__t == 1) return __b;
     const _Fp __x = __a + __t * (__b - __a);
     if (__t > 1 == __b > __a)
-    	return __b < __x ? __x : __b;
+        return __b < __x ? __x : __b;
     else
-    	return __x < __b ? __x : __b;
+        return __x < __b ? __x : __b;
 }
 
 constexpr float
diff --git a/libcxx/include/iterator b/libcxx/include/iterator
index 5e9e8229ed1d..c94d275ae276 100644
--- a/libcxx/include/iterator
+++ b/libcxx/include/iterator
@@ -406,7 +406,7 @@ template <class C> constexpr auto size(const C& c) -> decltype(c.size());
 template <class T, size_t N> constexpr size_t size(const T (&array)[N]) noexcept; // C++17
 
 template <class C> constexpr auto ssize(const C& c)
-    -> common_type_t<ptrdiff_t, make_signed_t<decltype(c.size())>>;				       // C++20
+    -> common_type_t<ptrdiff_t, make_signed_t<decltype(c.size())>>;                    // C++20
 template <class T, ptrdiff_t> constexpr ptrdiff_t ssize(const T (&array)[N]) noexcept; // C++20
 
 template <class C> constexpr auto empty(const C& c) -> decltype(c.empty());       // C++17
diff --git a/libcxx/include/numeric b/libcxx/include/numeric
index 4f202bb84f70..862396d5e1e4 100644
--- a/libcxx/include/numeric
+++ b/libcxx/include/numeric
@@ -613,8 +613,8 @@ _LIBCPP_INLINE_VISIBILITY constexpr
 enable_if_t<is_floating_point_v<_Fp>, _Fp>
 midpoint(_Fp __a, _Fp __b) noexcept
 {
-	constexpr _Fp __lo = numeric_limits<_Fp>::min()*2;
-	constexpr _Fp __hi = numeric_limits<_Fp>::max()/2;
+    constexpr _Fp __lo = numeric_limits<_Fp>::min()*2;
+    constexpr _Fp __hi = numeric_limits<_Fp>::max()/2;
     return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ?  // typical case: overflow is impossible
       (__a + __b)/2 :                                        // always correctly rounded
       __fp_abs(__a) < __lo ? __a + __b/2 :                   // not safe to halve a
diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits
index 43a04cbf049a..38cad3312bcf 100644
--- a/libcxx/include/type_traits
+++ b/libcxx/include/type_traits
@@ -2424,7 +2424,7 @@ template <class ..._Tp> using common_type_t = typename common_type<_Tp...>::type
 #endif
 
 #if _LIBCPP_STD_VER > 11
-// Let COPYCV(FROM, TO) be an alias for type TO with the addition of FROM’s
+// Let COPYCV(FROM, TO) be an alias for type TO with the addition of FROM's
 // top-level cv-qualifiers.
 template <class _From, class _To>
 struct __copy_cv
@@ -2493,8 +2493,8 @@ struct __xref {
   using __apply = __copy_cvref_t<_Tp, _Up>;
 };
 
-// Given types `A` and `B`, let `X` be `remove_­reference_­t<A>`, let `Y` be `remove_­reference_­t<B>`,
-// and let `COMMON-​REF(A, B)` be:
+// Given types A and B, let X be remove_reference_t<A>, let Y be remove_reference_t<B>,
+// and let COMMON-REF(A, B) be:
 template<class _Ap, class _Bp, class _Xp = remove_reference_t<_Ap>, class _Yp = remove_reference_t<_Bp>>
 struct __common_ref;
 
@@ -2505,8 +2505,8 @@ template<class _Xp, class _Yp>
 using __cv_cond_res = __cond_res<__copy_cv_t<_Xp, _Yp>&, __copy_cv_t<_Yp, _Xp>&>;
 
 
-//    If `A` and `B` are both lvalue reference types, `COMMON-REF(A, B)` is
-//    `COND-RES(COPYCV(X, Y) &, COPYCV(​Y, X) &)` if that type exists and is a reference type.
+//    If A and B are both lvalue reference types, COMMON-REF(A, B) is
+//    COND-RES(COPYCV(X, Y)&, COPYCV(Y, X)&) if that type exists and is a reference type.
 template<class _Ap, class _Bp, class _Xp, class _Yp>
 requires requires { typename __cv_cond_res<_Xp, _Yp>; } && is_reference_v<__cv_cond_res<_Xp, _Yp>>
 struct __common_ref<_Ap&, _Bp&, _Xp, _Yp>
@@ -2514,13 +2514,13 @@ struct __common_ref<_Ap&, _Bp&, _Xp, _Yp>
     using __type = __cv_cond_res<_Xp, _Yp>;
 };
 
-//    Otherwise, let `C` be `remove_­reference_­t<COMMON-REF(X&, Y&)>&&`....
+//    Otherwise, let C be remove_reference_t<COMMON-REF(X&, Y&)>&&. ...
 template <class _Xp, class _Yp>
 using __common_ref_C = remove_reference_t<__common_ref_t<_Xp&, _Yp&>>&&;
 
 
-//    .... If `A` and `B` are both rvalue reference types, `C` is well-formed, and
-//    `is_­convertible_­v<A, C> && is_­convertible_­v<B, C>` is `true`, then `COMMON-REF(A, B)` is `C`.
+//    .... If A and B are both rvalue reference types, C is well-formed, and
+//    is_convertible_v<A, C> && is_convertible_v<B, C> is true, then COMMON-REF(A, B) is C.
 template<class _Ap, class _Bp, class _Xp, class _Yp>
 requires
   requires { typename __common_ref_C<_Xp, _Yp>; } &&
@@ -2531,12 +2531,12 @@ struct __common_ref<_Ap&&, _Bp&&, _Xp, _Yp>
     using __type = __common_ref_C<_Xp, _Yp>;
 };
 
-//    Otherwise, let `D` be `COMMON-REF(const X&, Y&)`....
+//    Otherwise, let D be COMMON-REF(const X&, Y&). ...
 template <class _Tp, class _Up>
 using __common_ref_D = __common_ref_t<const _Tp&, _Up&>;
 
-//    ... If `A` is an rvalue reference and `B` is an lvalue reference and `D` is well-formed and
-//    `is_­convertible_­v<A, D>` is `true`, then `COMMON-REF(A, B)` is `D`.
+//    ... If A is an rvalue reference and B is an lvalue reference and D is well-formed and
+//    is_convertible_v<A, D> is true, then COMMON-REF(A, B) is D.
 template<class _Ap, class _Bp, class _Xp, class _Yp>
 requires requires { typename __common_ref_D<_Xp, _Yp>; } &&
          is_convertible_v<_Ap&&, __common_ref_D<_Xp, _Yp>>
@@ -2545,12 +2545,12 @@ struct __common_ref<_Ap&&, _Bp&, _Xp, _Yp>
     using __type = __common_ref_D<_Xp, _Yp>;
 };
 
-//    Otherwise, if `A` is an lvalue reference and `B` is an rvalue reference, then
-//    `COMMON-REF(A, B)` is `COMMON-REF(B, A)`.
+//    Otherwise, if A is an lvalue reference and B is an rvalue reference, then
+//    COMMON-REF(A, B) is COMMON-REF(B, A).
 template<class _Ap, class _Bp, class _Xp, class _Yp>
 struct __common_ref<_Ap&, _Bp&&, _Xp, _Yp> : __common_ref<_Bp&&, _Ap&> {};
 
-//    Otherwise, `COMMON-REF(A, B)` is ill-formed.
+//    Otherwise, COMMON-REF(A, B) is ill-formed.
 template<class _Ap, class _Bp, class _Xp, class _Yp>
 struct __common_ref {};
 
@@ -2578,8 +2578,8 @@ template <class _Tp, class _Up> struct __common_reference_sub_bullet3;
 template <class _Tp, class _Up> struct __common_reference_sub_bullet2 : __common_reference_sub_bullet3<_Tp, _Up> {};
 template <class _Tp, class _Up> struct __common_reference_sub_bullet1 : __common_reference_sub_bullet2<_Tp, _Up> {};
 
-// sub-bullet 1 - If `T1` and `T2` are reference types and `COMMON-REF(T1, T2)` is well-formed, then
-// the member typedef type denotes that type.
+// sub-bullet 1 - If T1 and T2 are reference types and COMMON-REF(T1, T2) is well-formed, then
+// the member typedef `type` denotes that type.
 template <class _Tp, class _Up> struct common_reference<_Tp, _Up> : __common_reference_sub_bullet1<_Tp, _Up> {};
 
 template <class _Tp, class _Up>
@@ -2589,8 +2589,8 @@ struct __common_reference_sub_bullet1<_Tp, _Up>
     using type = __common_ref_t<_Tp, _Up>;
 };
 
-// sub-bullet 2 - Otherwise, if `basic_­common_­reference<remove_­cvref_­t<T1>, remove_­cvref_­t<T2>, ​XREF(​T1), XREF(T2)>​::​type`
-// is well-formed, then the member typedef type denotes that type.
+// sub-bullet 2 - Otherwise, if basic_common_reference<remove_cvref_t<T1>, remove_cvref_t<T2>, XREF(T1), XREF(T2)>::type
+// is well-formed, then the member typedef `type` denotes that type.
 template <class, class, template <class> class, template <class> class> struct basic_common_reference {};
 
 template <class _Tp, class _Up>
@@ -2605,8 +2605,8 @@ struct __common_reference_sub_bullet2<_Tp, _Up>
     using type = __basic_common_reference_t<_Tp, _Up>;
 };
 
-// sub-bullet 3 - Otherwise, if `COND-RES(T1, T2)` is well-formed, then the member typedef type
-// denotes that type.
+// sub-bullet 3 - Otherwise, if COND-RES(T1, T2) is well-formed,
+// then the member typedef `type` denotes that type.
 template <class _Tp, class _Up>
 requires requires { typename __cond_res<_Tp, _Up>; }
 struct __common_reference_sub_bullet3<_Tp, _Up>
@@ -2615,20 +2615,20 @@ struct __common_reference_sub_bullet3<_Tp, _Up>
 };
 
 
-// sub-bullet 4 & 5 - Otherwise, if `common_­type_­t<T1, T2>` is well-formed, then the member typedef
-//                    type denotes that type.
-//                  - Otherwise, there shall be no member type.
+// sub-bullet 4 & 5 - Otherwise, if common_type_t<T1, T2> is well-formed,
+//                    then the member typedef `type` denotes that type.
+//                  - Otherwise, there shall be no member `type`.
 template <class _Tp, class _Up> struct __common_reference_sub_bullet3 : common_type<_Tp, _Up> {};
 
 // bullet 4 - If there is such a type `C`, the member typedef type shall denote the same type, if
-//            any, as `common_­reference_­t<C, Rest...>`.
+//            any, as `common_reference_t<C, Rest...>`.
 template <class _Tp, class _Up, class _Vp, class... _Rest>
 requires requires { typename common_reference_t<_Tp, _Up>; }
 struct common_reference<_Tp, _Up, _Vp, _Rest...>
     : common_reference<common_reference_t<_Tp, _Up>, _Vp, _Rest...>
 {};
 
-// bullet 5 - Otherwise, there shall be no member type.
+// bullet 5 - Otherwise, there shall be no member `type`.
 template <class...> struct common_reference {};
 
 #endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)

From dd68942f1d79986267a58c9a9924522680d5c82b Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Wed, 24 Mar 2021 02:29:30 +0000
Subject: [PATCH 48/52] [AST] Add TypeLoc support to node introspection

Extend the matchers gathering API for types to record template
parameters.  The TypeLoc type hierarchy has some types which are
templates used in CRTP such as PointerLikeTypeLoc.  Record the inherited
template and template arguments of types inheriting those CRTP types in
the ClassInheritance map.  Because the name inherited from is now
computed, the value type in that map changes from StringRef to
std::string.  This also causes the toJSON override signature used to
serialize that map to change.

Remove the logic for skipping over empty ClassData instances.  Several
classes such as TypeOfExprTypeLoc inherit a CRTP class which provides
interesting locations though the derived class does not.  Record it as a
class to make the locations it inherits available.

Record the typeSourceInfo accessors too as they provide access to
TypeLocs in many classes.

The existing unit tests use UnorderedElementsAre to compare the
introspection result with the expected result.  Our current
implementation of google mock (in gmock-generated-matchers.h) is limited
to support for comparing a container of 10 elements.  As we are now
returning more than 10 results for one of the introspection tests,
change it to instead compare against an ordered vector of pairs.

Because a macro is used to generate API strings and API calls, disable
clang-format in blocks of expected results.  Otherwise clang-format
would insert whitespaces which would then be compared against the
introspected strings and fail the test.

Introduce a recursion guard in the generated code.  The TypeLoc class
has IgnoreParens() API which by default returns itself, so it would
otherwise recurse infinitely.

Differential Revision: https://reviews.llvm.org/D100516
---
 .../include/clang/Tooling/NodeIntrospection.h |   1 +
 clang/lib/Tooling/CMakeLists.txt              |   4 +
 clang/lib/Tooling/DumpTool/APIData.h          |   8 +-
 .../Tooling/DumpTool/ASTSrcLocProcessor.cpp   | 137 +++--
 .../lib/Tooling/DumpTool/ASTSrcLocProcessor.h |   6 +-
 .../Tooling/DumpTool/generate_cxx_src_locs.py | 247 +++++++-
 .../Introspection/IntrospectionTest.cpp       | 577 +++++++++++++++---
 7 files changed, 821 insertions(+), 159 deletions(-)

diff --git a/clang/include/clang/Tooling/NodeIntrospection.h b/clang/include/clang/Tooling/NodeIntrospection.h
index c8518ea63546..dd7ffe399120 100644
--- a/clang/include/clang/Tooling/NodeIntrospection.h
+++ b/clang/include/clang/Tooling/NodeIntrospection.h
@@ -86,6 +86,7 @@ NodeLocationAccessors GetLocations(clang::CXXCtorInitializer const *Object);
 NodeLocationAccessors GetLocations(clang::NestedNameSpecifierLoc const *);
 NodeLocationAccessors GetLocations(clang::TemplateArgumentLoc const *);
 NodeLocationAccessors GetLocations(clang::CXXBaseSpecifier const *);
+NodeLocationAccessors GetLocations(clang::TypeLoc const &);
 NodeLocationAccessors GetLocations(clang::DynTypedNode const &Node);
 } // namespace NodeIntrospection
 } // namespace tooling
diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt
index e90b681e16f4..dfb732371dfb 100644
--- a/clang/lib/Tooling/CMakeLists.txt
+++ b/clang/lib/Tooling/CMakeLists.txt
@@ -58,6 +58,10 @@ NodeLocationAccessors NodeIntrospection::GetLocations(
     clang::CXXBaseSpecifier const*) {
   return {};
 }
+NodeLocationAccessors NodeIntrospection::GetLocations(
+    clang::TypeLoc const&) {
+  return {};
+}
 NodeLocationAccessors
 NodeIntrospection::GetLocations(clang::DynTypedNode const &) {
   return {};
diff --git a/clang/lib/Tooling/DumpTool/APIData.h b/clang/lib/Tooling/DumpTool/APIData.h
index 0ec53f6e7dc3..6ebf017b5c8f 100644
--- a/clang/lib/Tooling/DumpTool/APIData.h
+++ b/clang/lib/Tooling/DumpTool/APIData.h
@@ -16,13 +16,11 @@ namespace clang {
 namespace tooling {
 
 struct ClassData {
-
-  bool isEmpty() const {
-    return ASTClassLocations.empty() && ASTClassRanges.empty();
-  }
-
   std::vector<std::string> ASTClassLocations;
   std::vector<std::string> ASTClassRanges;
+  std::vector<std::string> TemplateParms;
+  std::vector<std::string> TypeSourceInfos;
+  std::vector<std::string> TypeLocs;
   // TODO: Extend this with locations available via typelocs etc.
 };
 
diff --git a/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp b/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp
index a19114a06064..497cd3bdce2c 100644
--- a/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp
+++ b/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp
@@ -22,18 +22,24 @@ ASTSrcLocProcessor::ASTSrcLocProcessor(StringRef JsonPath)
 
   Finder = std::make_unique<MatchFinder>(std::move(FinderOptions));
   Finder->addMatcher(
-      cxxRecordDecl(
-          isDefinition(),
-          isSameOrDerivedFrom(
-              // TODO: Extend this with other clades
-              namedDecl(hasAnyName("clang::Stmt", "clang::Decl",
-                                   "clang::CXXCtorInitializer",
-                                   "clang::NestedNameSpecifierLoc",
-                                   "clang::TemplateArgumentLoc",
-                                   "clang::CXXBaseSpecifier"))
-                  .bind("nodeClade")),
-          optionally(isDerivedFrom(cxxRecordDecl().bind("derivedFrom"))))
-          .bind("className"),
+          cxxRecordDecl(
+              isDefinition(),
+              isSameOrDerivedFrom(
+                  // TODO: Extend this with other clades
+                  namedDecl(hasAnyName("clang::Stmt", "clang::Decl",
+                                       "clang::CXXCtorInitializer",
+                                       "clang::NestedNameSpecifierLoc",
+                                       "clang::TemplateArgumentLoc",
+                                       "clang::CXXBaseSpecifier",
+                                       "clang::TypeLoc"))
+                      .bind("nodeClade")),
+              optionally(isDerivedFrom(cxxRecordDecl().bind("derivedFrom"))))
+              .bind("className"),
+      this);
+  Finder->addMatcher(
+          cxxRecordDecl(isDefinition(), hasAnyName("clang::PointerLikeTypeLoc",
+                                                   "clang::TypeofLikeTypeLoc"))
+              .bind("templateName"),
       this);
 }
 
@@ -53,7 +59,7 @@ llvm::json::Object toJSON(llvm::StringMap<std::vector<StringRef>> const &Obj) {
   return JsonObj;
 }
 
-llvm::json::Object toJSON(llvm::StringMap<StringRef> const &Obj) {
+llvm::json::Object toJSON(llvm::StringMap<std::string> const &Obj) {
   using llvm::json::toJSON;
 
   llvm::json::Object JsonObj;
@@ -70,6 +76,12 @@ llvm::json::Object toJSON(ClassData const &Obj) {
     JsonObj["sourceLocations"] = Obj.ASTClassLocations;
   if (!Obj.ASTClassRanges.empty())
     JsonObj["sourceRanges"] = Obj.ASTClassRanges;
+  if (!Obj.TemplateParms.empty())
+    JsonObj["templateParms"] = Obj.TemplateParms;
+  if (!Obj.TypeSourceInfos.empty())
+    JsonObj["typeSourceInfos"] = Obj.TypeSourceInfos;
+  if (!Obj.TypeLocs.empty())
+    JsonObj["typeLocs"] = Obj.TypeLocs;
   return JsonObj;
 }
 
@@ -77,10 +89,8 @@ llvm::json::Object toJSON(llvm::StringMap<ClassData> const &Obj) {
   using llvm::json::toJSON;
 
   llvm::json::Object JsonObj;
-  for (const auto &Item : Obj) {
-    if (!Item.second.isEmpty())
-      JsonObj[Item.first()] = ::toJSON(Item.second);
-  }
+  for (const auto &Item : Obj)
+    JsonObj[Item.first()] = ::toJSON(Item.second);
   return JsonObj;
 }
 
@@ -127,28 +137,40 @@ CaptureMethods(std::string TypeString, const clang::CXXRecordDecl *ASTClass,
                   equalsNode(ASTClass),
                   optionally(isDerivedFrom(
                       cxxRecordDecl(hasAnyName("clang::Stmt", "clang::Decl"))
-                          .bind("stmtOrDeclBase"))))),
+                          .bind("stmtOrDeclBase"))),
+                  optionally(isDerivedFrom(
+                      cxxRecordDecl(hasName("clang::Expr")).bind("exprBase"))),
+                  optionally(
+                      isDerivedFrom(cxxRecordDecl(hasName("clang::TypeLoc"))
+                                        .bind("typeLocBase"))))),
               returns(asString(TypeString)))
               .bind("classMethod")),
       *ASTClass, *Result.Context);
 
   std::vector<std::string> Methods;
   for (const auto &BN : BoundNodesVec) {
-    const auto *StmtOrDeclBase =
-        BN.getNodeAs<clang::CXXRecordDecl>("stmtOrDeclBase");
     if (const auto *Node = BN.getNodeAs<clang::NamedDecl>("classMethod")) {
-      // Only record the getBeginLoc etc on Stmt etc, because it will call
-      // more-derived implementations pseudo-virtually.
+      const auto *StmtOrDeclBase =
+          BN.getNodeAs<clang::CXXRecordDecl>("stmtOrDeclBase");
+      const auto *TypeLocBase =
+          BN.getNodeAs<clang::CXXRecordDecl>("typeLocBase");
+      const auto *ExprBase = BN.getNodeAs<clang::CXXRecordDecl>("exprBase");
+      // The clang AST has several methods on base classes which are overriden
+      // pseudo-virtually by derived classes.
+      // We record only the pseudo-virtual methods on the base classes to
+      // avoid duplication.
       if (StmtOrDeclBase &&
           (Node->getName() == "getBeginLoc" || Node->getName() == "getEndLoc" ||
            Node->getName() == "getSourceRange"))
         continue;
-
-      // Only record the getExprLoc on Expr, because it will call
-      // more-derived implementations pseudo-virtually.
-      if (ASTClass->getName() != "Expr" && Node->getName() == "getExprLoc") {
+      if (ExprBase && Node->getName() == "getExprLoc")
+        continue;
+      if (TypeLocBase && Node->getName() == "getLocalSourceRange")
+        continue;
+      if ((ASTClass->getName() == "PointerLikeTypeLoc" ||
+           ASTClass->getName() == "TypeofLikeTypeLoc") &&
+          Node->getName() == "getLocalSourceRange")
         continue;
-      }
       Methods.push_back(Node->getName().str());
     }
   }
@@ -160,25 +182,64 @@ void ASTSrcLocProcessor::run(const MatchFinder::MatchResult &Result) {
   const auto *ASTClass =
       Result.Nodes.getNodeAs<clang::CXXRecordDecl>("className");
 
+  StringRef CladeName;
+  if (ASTClass) {
+    if (const auto *NodeClade =
+            Result.Nodes.getNodeAs<clang::CXXRecordDecl>("nodeClade"))
+      CladeName = NodeClade->getName();
+  } else {
+    ASTClass = Result.Nodes.getNodeAs<clang::CXXRecordDecl>("templateName");
+    CladeName = "TypeLoc";
+  }
+
   StringRef ClassName = ASTClass->getName();
 
   ClassData CD;
 
-  const auto *NodeClade =
-      Result.Nodes.getNodeAs<clang::CXXRecordDecl>("nodeClade");
-  StringRef CladeName = NodeClade->getName();
-
-  if (const auto *DerivedFrom =
-          Result.Nodes.getNodeAs<clang::CXXRecordDecl>("derivedFrom"))
-    ClassInheritance[ClassName] = DerivedFrom->getName();
-
   CD.ASTClassLocations =
       CaptureMethods("class clang::SourceLocation", ASTClass, Result);
   CD.ASTClassRanges =
       CaptureMethods("class clang::SourceRange", ASTClass, Result);
+  CD.TypeSourceInfos =
+      CaptureMethods("class clang::TypeSourceInfo *", ASTClass, Result);
+  CD.TypeLocs = CaptureMethods("class clang::TypeLoc", ASTClass, Result);
 
-  if (!CD.isEmpty()) {
-    ClassEntries[ClassName] = CD;
-    ClassesInClade[CladeName].push_back(ClassName);
+  if (const auto *DerivedFrom =
+          Result.Nodes.getNodeAs<clang::CXXRecordDecl>("derivedFrom")) {
+
+    if (const auto *Templ =
+            llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
+                DerivedFrom)) {
+
+      const auto &TArgs = Templ->getTemplateArgs();
+
+      std::string TArgsString = (DerivedFrom->getName() + "<").str();
+
+      for (unsigned I = 0; I < TArgs.size(); ++I) {
+        if (I > 0) {
+          TArgsString += ", ";
+        }
+        auto Ty = TArgs.get(I).getAsType();
+        clang::PrintingPolicy PPol(Result.Context->getLangOpts());
+        PPol.TerseOutput = true;
+        TArgsString += Ty.getAsString(PPol);
+      }
+      TArgsString += ">";
+
+      ClassInheritance[ClassName] = std::move(TArgsString);
+    } else {
+      ClassInheritance[ClassName] = DerivedFrom->getName().str();
+    }
+  }
+
+  if (const auto *Templ = ASTClass->getDescribedClassTemplate()) {
+    if (auto *TParams = Templ->getTemplateParameters()) {
+      for (const auto &TParam : *TParams) {
+        CD.TemplateParms.push_back(TParam->getName().str());
+      }
+    }
   }
+
+  ClassEntries[ClassName] = CD;
+  ClassesInClade[CladeName].push_back(ClassName);
 }
diff --git a/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h b/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h
index 5d848f48ed54..05c4f92676e8 100644
--- a/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h
+++ b/clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h
@@ -35,7 +35,11 @@ class ASTSrcLocProcessor : public ast_matchers::MatchFinder::MatchCallback {
 private:
   void run(const ast_matchers::MatchFinder::MatchResult &Result) override;
 
-  llvm::StringMap<StringRef> ClassInheritance;
+  llvm::Optional<TraversalKind> getCheckTraversalKind() const override {
+    return TK_IgnoreUnlessSpelledInSource;
+  }
+
+  llvm::StringMap<std::string> ClassInheritance;
   llvm::StringMap<std::vector<StringRef>> ClassesInClade;
   llvm::StringMap<ClassData> ClassEntries;
 
diff --git a/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py b/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
index b0953df19203..3664f521e27b 100755
--- a/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
+++ b/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
@@ -11,6 +11,9 @@ class Generator(object):
 
     implementationContent = ''
 
+    def __init__(self, templateClasses):
+        self.templateClasses = templateClasses
+
     def GeneratePrologue(self):
 
         self.implementationContent += \
@@ -30,25 +33,69 @@ def GeneratePrologue(self):
 using RangeAndString = SourceRangeMap::value_type;
 
 bool NodeIntrospection::hasIntrospectionSupport() { return true; }
+
+struct RecursionPopper
+{
+    RecursionPopper(std::vector<clang::TypeLoc> &TypeLocRecursionGuard)
+    :  TLRG(TypeLocRecursionGuard)
+    {
+
+    }
+
+    ~RecursionPopper()
+    {
+    TLRG.pop_back();
+    }
+
+private:
+std::vector<clang::TypeLoc> &TLRG;
+};
 """
 
     def GenerateBaseGetLocationsDeclaration(self, CladeName):
+        InstanceDecoration = "*"
+        if CladeName == "TypeLoc":
+            InstanceDecoration = "&"
+
         self.implementationContent += \
             """
 void GetLocationsImpl(SharedLocationCall const& Prefix,
-    clang::{0} const *Object, SourceLocationMap &Locs,
-    SourceRangeMap &Rngs);
-""".format(CladeName)
-
-    def GenerateSrcLocMethod(self, ClassName, ClassData):
+    clang::{0} const {1}Object, SourceLocationMap &Locs,
+    SourceRangeMap &Rngs,
+    std::vector<clang::TypeLoc> &TypeLocRecursionGuard);
+""".format(CladeName, InstanceDecoration)
+
+    def GenerateSrcLocMethod(self,
+            ClassName, ClassData, CreateLocalRecursionGuard):
+
+        NormalClassName = ClassName
+        RecursionGuardParam = ('' if CreateLocalRecursionGuard else \
+            ', std::vector<clang::TypeLoc>& TypeLocRecursionGuard')
+
+        if "templateParms" in ClassData:
+            TemplatePreamble = "template <typename "
+            ClassName += "<"
+            First = True
+            for TA in ClassData["templateParms"]:
+                if not First:
+                    ClassName += ", "
+                    TemplatePreamble += ", typename "
+
+                First = False
+                ClassName += TA
+                TemplatePreamble += TA
+
+            ClassName += ">"
+            TemplatePreamble += ">\n";
+            self.implementationContent += TemplatePreamble
 
         self.implementationContent += \
             """
 static void GetLocations{0}(SharedLocationCall const& Prefix,
-    clang::{0} const &Object,
-    SourceLocationMap &Locs, SourceRangeMap &Rngs)
+    clang::{1} const &Object,
+    SourceLocationMap &Locs, SourceRangeMap &Rngs {2})
 {{
-""".format(ClassName)
+""".format(NormalClassName, ClassName, RecursionGuardParam)
 
         if 'sourceLocations' in ClassData:
             for locName in ClassData['sourceLocations']:
@@ -70,6 +117,40 @@ def GenerateSrcLocMethod(self, ClassName, ClassData):
 
             self.implementationContent += '\n'
 
+        if 'typeLocs' in ClassData or 'typeSourceInfos' in ClassData:
+            if CreateLocalRecursionGuard:
+                self.implementationContent += \
+                    'std::vector<clang::TypeLoc> TypeLocRecursionGuard;\n'
+
+            self.implementationContent += '\n'
+
+            if 'typeLocs' in ClassData:
+                for typeLoc in ClassData['typeLocs']:
+
+                    self.implementationContent += \
+                        """
+              if (Object.{0}()) {{
+                GetLocationsImpl(
+                    llvm::makeIntrusiveRefCnt<LocationCall>(Prefix, "{0}"),
+                    Object.{0}(), Locs, Rngs, TypeLocRecursionGuard);
+                }}
+              """.format(typeLoc)
+
+            self.implementationContent += '\n'
+            if 'typeSourceInfos' in ClassData:
+                for tsi in ClassData['typeSourceInfos']:
+                    self.implementationContent += \
+                        """
+              if (Object.{0}()) {{
+                GetLocationsImpl(llvm::makeIntrusiveRefCnt<LocationCall>(
+                    llvm::makeIntrusiveRefCnt<LocationCall>(Prefix, "{0}",
+                        LocationCall::ReturnsPointer), "getTypeLoc"),
+                    Object.{0}()->getTypeLoc(), Locs, Rngs, TypeLocRecursionGuard);
+                    }}
+              """.format(tsi)
+
+                self.implementationContent += '\n'
+
         self.implementationContent += '}\n'
 
     def GenerateFiles(self, OutputFile):
@@ -77,32 +158,78 @@ def GenerateFiles(self, OutputFile):
                   OutputFile), 'w') as f:
             f.write(self.implementationContent)
 
-    def GenerateBaseGetLocationsFunction(self, ASTClassNames, CladeName):
+    def GenerateBaseGetLocationsFunction(self, ASTClassNames,
+            ClassEntries, CladeName, InheritanceMap,
+            CreateLocalRecursionGuard):
 
         MethodReturnType = 'NodeLocationAccessors'
+        InstanceDecoration = "*"
+        if CladeName == "TypeLoc":
+            InstanceDecoration = "&"
 
         Signature = \
-            'GetLocations(clang::{0} const *Object)'.format(CladeName)
+            'GetLocations(clang::{0} const {1}Object)'.format(
+                CladeName, InstanceDecoration)
         ImplSignature = \
             """
-GetLocationsImpl(SharedLocationCall const& Prefix,
-    clang::{0} const *Object, SourceLocationMap &Locs,
-    SourceRangeMap &Rngs)
-""".format(CladeName)
+    GetLocationsImpl(SharedLocationCall const& Prefix,
+        clang::{0} const {1}Object, SourceLocationMap &Locs,
+        SourceRangeMap &Rngs,
+        std::vector<clang::TypeLoc> &TypeLocRecursionGuard)
+    """.format(CladeName, InstanceDecoration)
+
+        self.implementationContent += 'void {0} {{ '.format(ImplSignature)
+
+        if CladeName == "TypeLoc":
+            self.implementationContent += 'if (Object.isNull()) return;'
+
+            self.implementationContent += \
+                """
+            if (llvm::find(TypeLocRecursionGuard, Object) != TypeLocRecursionGuard.end())
+              return;
+            TypeLocRecursionGuard.push_back(Object);
+            RecursionPopper RAII(TypeLocRecursionGuard);
+                """
 
+        RecursionGuardParam = ''
+        if not CreateLocalRecursionGuard:
+            RecursionGuardParam = ', TypeLocRecursionGuard'
+
+        ArgPrefix = '*'
+        if CladeName == "TypeLoc":
+            ArgPrefix = ''
         self.implementationContent += \
-            'void {0} {{ GetLocations{1}(Prefix, *Object, Locs, Rngs);'.format(
-                ImplSignature,
-                CladeName)
+            'GetLocations{0}(Prefix, {1}Object, Locs, Rngs {2});'.format(
+                CladeName, ArgPrefix, RecursionGuardParam)
+
+        if CladeName == "TypeLoc":
+            self.implementationContent += \
+                '''
+        if (auto QTL = Object.getAs<clang::QualifiedTypeLoc>()) {
+            auto Dequalified = QTL.getNextTypeLoc();
+            return GetLocationsImpl(llvm::makeIntrusiveRefCnt<LocationCall>(Prefix, "getNextTypeLoc"),
+                                Dequalified,
+                                Locs,
+                                Rngs,
+                                TypeLocRecursionGuard);
+        }'''
 
         for ASTClassName in ASTClassNames:
-            if ASTClassName != CladeName:
+            if ASTClassName in self.templateClasses:
+                continue
+            if ASTClassName == CladeName:
+                continue
+            if CladeName != "TypeLoc":
                 self.implementationContent += \
-                    """
+                """
 if (auto Derived = llvm::dyn_cast<clang::{0}>(Object)) {{
-  GetLocations{0}(Prefix, *Derived, Locs, Rngs);
+  GetLocations{0}(Prefix, *Derived, Locs, Rngs {1});
 }}
-""".format(ASTClassName)
+""".format(ASTClassName, RecursionGuardParam)
+                continue
+
+            self.GenerateBaseTypeLocVisit(ASTClassName, ClassEntries,
+                RecursionGuardParam, InheritanceMap)
 
         self.implementationContent += '}'
 
@@ -111,14 +238,43 @@ def GenerateBaseGetLocationsFunction(self, ASTClassNames, CladeName):
 {0} NodeIntrospection::{1} {{
   NodeLocationAccessors Result;
   SharedLocationCall Prefix;
+  std::vector<clang::TypeLoc> TypeLocRecursionGuard;
 
   GetLocationsImpl(Prefix, Object, Result.LocationAccessors,
-                   Result.RangeAccessors);
-""".format(MethodReturnType,
-                Signature)
+                   Result.RangeAccessors, TypeLocRecursionGuard);
+""".format(MethodReturnType, Signature)
 
         self.implementationContent += 'return Result; }'
 
+    def GenerateBaseTypeLocVisit(self, ASTClassName, ClassEntries,
+            RecursionGuardParam, InheritanceMap):
+        CallPrefix = 'Prefix'
+        if ASTClassName != 'TypeLoc':
+            CallPrefix = \
+                '''llvm::makeIntrusiveRefCnt<LocationCall>(Prefix,
+                    "getAs<clang::{0}>", LocationCall::IsCast)
+                '''.format(ASTClassName)
+
+        if ASTClassName in ClassEntries:
+
+            self.implementationContent += \
+            """
+            if (auto ConcreteTL = Object.getAs<clang::{0}>())
+              GetLocations{1}({2}, ConcreteTL, Locs, Rngs {3});
+            """.format(ASTClassName, ASTClassName,
+                       CallPrefix, RecursionGuardParam)
+
+        if ASTClassName in InheritanceMap:
+            for baseTemplate in self.templateClasses:
+                if baseTemplate in InheritanceMap[ASTClassName]:
+                    self.implementationContent += \
+                    """
+    if (auto ConcreteTL = Object.getAs<clang::{0}>())
+      GetLocations{1}({2}, ConcreteTL, Locs, Rngs {3});
+    """.format(InheritanceMap[ASTClassName], baseTemplate,
+            CallPrefix, RecursionGuardParam)
+
+
     def GenerateDynNodeVisitor(self, CladeNames):
         MethodReturnType = 'NodeLocationAccessors'
 
@@ -132,7 +288,13 @@ def GenerateDynNodeVisitor(self, CladeNames):
             self.implementationContent += \
                 """
     if (const auto *N = Node.get<{0}>())
-      return GetLocations(const_cast<{0} *>(N));""".format(CladeName)
+    """.format(CladeName)
+            ArgPrefix = ""
+            if CladeName == "TypeLoc":
+                ArgPrefix = "*"
+            self.implementationContent += \
+            """
+      return GetLocations({0}const_cast<{1} *>(N));""".format(ArgPrefix, CladeName)
 
         self.implementationContent += '\nreturn {}; }'
 
@@ -200,6 +362,10 @@ def main():
     clang::CXXBaseSpecifier const*) {
   return {};
 }
+NodeLocationAccessors NodeIntrospection::GetLocations(
+    clang::TypeLoc const&) {
+  return {};
+}
 NodeLocationAccessors
 NodeIntrospection::GetLocations(clang::DynTypedNode const &) {
   return {};
@@ -209,19 +375,42 @@ def main():
     """)
         sys.exit(0)
 
-    g = Generator()
+    templateClasses = []
+    for (ClassName, ClassAccessors) in jsonData['classEntries'].items():
+        if "templateParms" in ClassAccessors:
+            templateClasses.append(ClassName)
+
+    g = Generator(templateClasses)
 
     g.GeneratePrologue()
 
     for (CladeName, ClassNameData) in jsonData['classesInClade'].items():
         g.GenerateBaseGetLocationsDeclaration(CladeName)
 
+    def getCladeName(ClassName):
+      for (CladeName, ClassNameData) in jsonData['classesInClade'].items():
+        if ClassName in ClassNameData:
+          return CladeName
+
     for (ClassName, ClassAccessors) in jsonData['classEntries'].items():
-        if ClassAccessors:
-            g.GenerateSrcLocMethod(ClassName, ClassAccessors)
+        cladeName = getCladeName(ClassName)
+        g.GenerateSrcLocMethod(
+            ClassName, ClassAccessors,
+            cladeName not in [
+                      'NestedNameSpecifierLoc',
+                      'TemplateArgumentLoc',
+                      'TypeLoc'])
 
     for (CladeName, ClassNameData) in jsonData['classesInClade'].items():
-        g.GenerateBaseGetLocationsFunction(ClassNameData, CladeName)
+        g.GenerateBaseGetLocationsFunction(
+            ClassNameData,
+            jsonData['classEntries'],
+            CladeName,
+            jsonData["classInheritance"],
+            CladeName not in [
+                      'NestedNameSpecifierLoc',
+                      'TemplateArgumentLoc',
+                      'TypeLoc'])
 
     g.GenerateDynNodeVisitor(jsonData['classesInClade'].keys())
 
diff --git a/clang/unittests/Introspection/IntrospectionTest.cpp b/clang/unittests/Introspection/IntrospectionTest.cpp
index e56963aa41a6..57431668a19f 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -26,25 +26,27 @@ using namespace clang::tooling;
 using ::testing::Pair;
 using ::testing::UnorderedElementsAre;
 
-template<typename T, typename MapType>
-std::map<std::string, T>
+template <typename T, typename MapType>
+std::vector<std::pair<std::string, T>>
 FormatExpected(const MapType &Accessors) {
-  std::map<std::string, T> Result;
+  std::vector<std::pair<std::string, T>> Result;
   llvm::transform(llvm::make_filter_range(Accessors,
                                           [](const auto &Accessor) {
                                             return Accessor.first.isValid();
                                           }),
-                  std::inserter(Result, Result.end()),
-                  [](const auto &Accessor) {
-                    return std::make_pair(LocationCallFormatterCpp::format(
-                                              *Accessor.second.get()),
-                                          Accessor.first);
+                  std::back_inserter(Result), [](const auto &Accessor) {
+                    return std::make_pair(
+                        LocationCallFormatterCpp::format(*Accessor.second),
+                        Accessor.first);
                   });
   return Result;
 }
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+#define STRING_LOCATION_STDPAIR(INSTANCE, LOC)                                 \
+  std::make_pair(std::string(#LOC), INSTANCE->LOC)
+
 /**
   A test formatter for a hypothetical language which needs
   neither casts nor '->'.
@@ -191,26 +193,85 @@ ns1::ns2::Foo<A, B> ns1::ns2::Bar<T, U>::Nested::method(int i, bool b) const
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
-  EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(
-                  STRING_LOCATION_PAIR(MethodDecl, getBeginLoc()),
-                  STRING_LOCATION_PAIR(MethodDecl, getBodyRBrace()),
-                  STRING_LOCATION_PAIR(MethodDecl, getInnerLocStart()),
-                  STRING_LOCATION_PAIR(MethodDecl, getLocation()),
-                  STRING_LOCATION_PAIR(MethodDecl, getOuterLocStart()),
-                  STRING_LOCATION_PAIR(MethodDecl, getTypeSpecEndLoc()),
-                  STRING_LOCATION_PAIR(MethodDecl, getTypeSpecStartLoc()),
-                  STRING_LOCATION_PAIR(MethodDecl, getEndLoc())));
+  llvm::sort(ExpectedLocations);
+
+  // clang-format off
+  EXPECT_EQ(
+      llvm::makeArrayRef(ExpectedLocations),
+      (ArrayRef<std::pair<std::string, SourceLocation>>{
+STRING_LOCATION_STDPAIR(MethodDecl, getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getBodyRBrace()),
+STRING_LOCATION_STDPAIR(MethodDecl, getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getInnerLocStart()),
+STRING_LOCATION_STDPAIR(MethodDecl, getLocation()),
+STRING_LOCATION_STDPAIR(MethodDecl, getOuterLocStart()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getLParenLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getLocalRangeBegin()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getLocalRangeEnd()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getRParenLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecStartLoc())
+  }));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(
-      ExpectedRanges,
-      UnorderedElementsAre(
-          STRING_LOCATION_PAIR(MethodDecl, getExceptionSpecSourceRange()),
-          STRING_LOCATION_PAIR(MethodDecl, getParametersSourceRange()),
-          STRING_LOCATION_PAIR(MethodDecl, getReturnTypeSourceRange()),
-          STRING_LOCATION_PAIR(MethodDecl, getSourceRange())));
+  llvm::sort(ExpectedRanges, [](const auto &LHS, const auto &RHS) {
+    return LHS.first < RHS.first;
+  });
+
+  // clang-format off
+  EXPECT_EQ(
+            llvm::makeArrayRef(ExpectedRanges),
+      (ArrayRef<std::pair<std::string, SourceRange>>{
+STRING_LOCATION_STDPAIR(MethodDecl, getExceptionSpecSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getParametersSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getReturnTypeSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getExceptionSpecRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getParensRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getNextTypeLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getAs<clang::FunctionTypeLoc>().getReturnLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getAs<clang::ElaboratedTypeLoc>().getNamedTypeLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getSourceRange()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getSourceRange())
+  }));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_NNS) {
@@ -244,17 +305,25 @@ void ns::A::foo() {}
 
   EXPECT_THAT(
       ExpectedLocations,
-      UnorderedElementsAre(STRING_LOCATION_PAIR(NNS, getBeginLoc()),
-                           STRING_LOCATION_PAIR(NNS, getEndLoc()),
-                           STRING_LOCATION_PAIR(NNS, getLocalBeginLoc()),
-                           STRING_LOCATION_PAIR(NNS, getLocalEndLoc())));
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(NNS, getBeginLoc()),
+          STRING_LOCATION_PAIR(NNS, getEndLoc()),
+          STRING_LOCATION_PAIR(NNS, getLocalBeginLoc()),
+          STRING_LOCATION_PAIR(NNS, getLocalEndLoc()),
+          STRING_LOCATION_PAIR(
+              NNS, getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+          STRING_LOCATION_PAIR(NNS, getTypeLoc().getBeginLoc()),
+          STRING_LOCATION_PAIR(NNS, getTypeLoc().getEndLoc())));
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
   EXPECT_THAT(
       ExpectedRanges,
-      UnorderedElementsAre(STRING_LOCATION_PAIR(NNS, getLocalSourceRange()),
-                           STRING_LOCATION_PAIR(NNS, getSourceRange())));
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(NNS, getLocalSourceRange()),
+          STRING_LOCATION_PAIR(NNS, getSourceRange()),
+          STRING_LOCATION_PAIR(NNS, getTypeLoc().getSourceRange()),
+          STRING_LOCATION_PAIR(NNS, getTypeLoc().getLocalSourceRange())));
 }
 
 TEST(Introspection, SourceLocations_TA_Type) {
@@ -288,13 +357,31 @@ void foo()
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(TA, getLocation())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(TA, getLocation()),
+STRING_LOCATION_PAIR(TA,
+  getTypeSourceInfo()->getTypeLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_PAIR(TA,
+  getTypeSourceInfo()->getTypeLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(
+    TA, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(
+    TA, getTypeSourceInfo()->getTypeLoc().getEndLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(TA, getSourceRange())));
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(TA, getSourceRange()),
+          STRING_LOCATION_PAIR(
+              TA, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+          STRING_LOCATION_PAIR(
+              TA, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())));
 }
 
 TEST(Introspection, SourceLocations_TA_Decl) {
@@ -546,13 +633,31 @@ void foo()
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(TA, getLocation())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(TA, getLocation()),
+STRING_LOCATION_PAIR(TA,
+  getTypeSourceInfo()->getTypeLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_PAIR(TA,
+  getTypeSourceInfo()->getTypeLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(
+    TA, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(
+    TA, getTypeSourceInfo()->getTypeLoc().getEndLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(TA, getSourceRange())));
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(TA, getSourceRange()),
+          STRING_LOCATION_PAIR(
+              TA, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+          STRING_LOCATION_PAIR(
+              TA, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())));
 }
 
 TEST(Introspection, SourceLocations_CXXCtorInitializer_base) {
@@ -585,16 +690,34 @@ struct B : A {
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
-  EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(
-                  STRING_LOCATION_PAIR(CtorInit, getLParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getRParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getSourceLocation())));
+  // clang-format off
+  EXPECT_THAT(
+      ExpectedLocations,
+      UnorderedElementsAre(
+STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getEndLoc()),
+STRING_LOCATION_PAIR(CtorInit, getLParenLoc()),
+STRING_LOCATION_PAIR(CtorInit, getRParenLoc()),
+STRING_LOCATION_PAIR(CtorInit, getSourceLocation()),
+STRING_LOCATION_PAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getEndLoc())
+ ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  CtorInit, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+  STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getLocalSourceRange()),
+  STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getSourceRange()),
+  STRING_LOCATION_PAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange()),
+  STRING_LOCATION_PAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+  STRING_LOCATION_PAIR(CtorInit, getSourceRange())));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_CXXCtorInitializer_member) {
@@ -666,16 +789,33 @@ struct C {
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
-  EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(
-                  STRING_LOCATION_PAIR(CtorInit, getLParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getRParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getSourceLocation())));
+  // clang-format off
+  EXPECT_THAT(
+      ExpectedLocations,
+      UnorderedElementsAre(
+STRING_LOCATION_PAIR(CtorInit, getLParenLoc()),
+STRING_LOCATION_PAIR(CtorInit, getRParenLoc()),
+STRING_LOCATION_PAIR(CtorInit, getSourceLocation()),
+STRING_LOCATION_PAIR(CtorInit,
+                     getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(CtorInit,
+                     getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_PAIR(CtorInit,
+  getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  CtorInit, getSourceRange())));
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(CtorInit, getSourceRange()),
+          STRING_LOCATION_PAIR(
+              CtorInit,
+              getTypeSourceInfo()->getTypeLoc().getLocalSourceRange()),
+          STRING_LOCATION_PAIR(
+              CtorInit, getTypeSourceInfo()->getTypeLoc().getSourceRange())));
 }
 
 TEST(Introspection, SourceLocations_CXXCtorInitializer_pack) {
@@ -711,18 +851,44 @@ struct D : Templ<T...> {
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
-  EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(
-                  STRING_LOCATION_PAIR(CtorInit, getEllipsisLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getLParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getMemberLocation()),
-                  STRING_LOCATION_PAIR(CtorInit, getRParenLoc()),
-                  STRING_LOCATION_PAIR(CtorInit, getSourceLocation())));
+  llvm::sort(ExpectedLocations);
+
+  // clang-format off
+  EXPECT_EQ(
+     llvm::makeArrayRef(ExpectedLocations),
+      (ArrayRef<std::pair<std::string, SourceLocation>>{
+STRING_LOCATION_STDPAIR(CtorInit, getBaseClassLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getBaseClassLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getBaseClassLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getBaseClassLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getBaseClassLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getEllipsisLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getLParenLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getMemberLocation()),
+STRING_LOCATION_STDPAIR(CtorInit, getRParenLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getSourceLocation()),
+STRING_LOCATION_STDPAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(CtorInit, getTypeSourceInfo()->getTypeLoc().getEndLoc())
+  }));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  CtorInit, getSourceRange())));
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+          STRING_LOCATION_PAIR(CtorInit,
+                               getBaseClassLoc().getLocalSourceRange()),
+          STRING_LOCATION_PAIR(CtorInit, getBaseClassLoc().getSourceRange()),
+          STRING_LOCATION_PAIR(CtorInit, getSourceRange()),
+          STRING_LOCATION_PAIR(
+              CtorInit,
+              getTypeSourceInfo()->getTypeLoc().getLocalSourceRange()),
+          STRING_LOCATION_PAIR(
+              CtorInit, getTypeSourceInfo()->getTypeLoc().getSourceRange())));
 }
 
 TEST(Introspection, SourceLocations_CXXBaseSpecifier_plain) {
@@ -751,15 +917,27 @@ class B : A {};
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
-                                   STRING_LOCATION_PAIR(Base, getBeginLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEndLoc())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
+STRING_LOCATION_PAIR(Base, getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getBeginLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  Base, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())
+    ));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_CXXBaseSpecifier_accessspec) {
@@ -788,15 +966,27 @@ class B : public A {};
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
-                                   STRING_LOCATION_PAIR(Base, getBeginLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEndLoc())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
+STRING_LOCATION_PAIR(Base, getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getBeginLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  Base, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getSourceRange())
+  ));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_CXXBaseSpecifier_virtual) {
@@ -826,15 +1016,27 @@ class C : virtual B, A {};
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
-                                   STRING_LOCATION_PAIR(Base, getBeginLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEndLoc())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
+STRING_LOCATION_PAIR(Base, getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getEndLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  Base, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())
+  ));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_CXXBaseSpecifier_template_base) {
@@ -864,15 +1066,29 @@ class B : A<int, bool> {};
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
-                                   STRING_LOCATION_PAIR(Base, getBeginLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEndLoc())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
+STRING_LOCATION_PAIR(Base, getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getTemplateNameLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getLAngleLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getRAngleLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  Base, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())
+  ));
+  // clang-format on
 }
 
 TEST(Introspection, SourceLocations_CXXBaseSpecifier_pack) {
@@ -903,14 +1119,203 @@ struct Templ : T... {
   auto ExpectedLocations =
       FormatExpected<SourceLocation>(Result.LocationAccessors);
 
+  // clang-format off
   EXPECT_THAT(ExpectedLocations,
-              UnorderedElementsAre(STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEllipsisLoc()),
-                                   STRING_LOCATION_PAIR(Base, getBeginLoc()),
-                                   STRING_LOCATION_PAIR(Base, getEndLoc())));
+              UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getBaseTypeLoc()),
+STRING_LOCATION_PAIR(Base, getEllipsisLoc()),
+STRING_LOCATION_PAIR(Base, getBeginLoc()),
+STRING_LOCATION_PAIR(Base, getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getAs<clang::TypeSpecTypeLoc>().getNameLoc()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getBeginLoc())
+  ));
+  // clang-format on
 
   auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
 
-  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(STRING_LOCATION_PAIR(
-                                  Base, getSourceRange())));
+  // clang-format off
+  EXPECT_THAT(ExpectedRanges, UnorderedElementsAre(
+STRING_LOCATION_PAIR(Base, getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(Base, getTypeSourceInfo()->getTypeLoc().getLocalSourceRange())
+  ));
+  // clang-format on
+}
+
+TEST(Introspection, SourceLocations_FunctionProtoTypeLoc) {
+  auto AST =
+      buildASTFromCode(R"cpp(
+int foo();
+)cpp",
+                       "foo.cpp", std::make_shared<PCHContainerOperations>());
+  auto &Ctx = AST->getASTContext();
+  auto &TU = *Ctx.getTranslationUnitDecl();
+
+  auto BoundNodes = ast_matchers::match(
+      decl(hasDescendant(loc(functionProtoType()).bind("tl"))), TU, Ctx);
+
+  EXPECT_EQ(BoundNodes.size(), 1u);
+
+  const auto *TL = BoundNodes[0].getNodeAs<TypeLoc>("tl");
+  auto Result = NodeIntrospection::GetLocations(*TL);
+
+  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
+    return;
+  }
+
+  auto ExpectedLocations =
+      FormatExpected<SourceLocation>(Result.LocationAccessors);
+
+  llvm::sort(ExpectedLocations);
+
+  // clang-format off
+  EXPECT_EQ(
+      llvm::makeArrayRef(ExpectedLocations),
+          (ArrayRef<std::pair<std::string, SourceLocation>>{
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getLParenLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getLocalRangeBegin()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getLocalRangeEnd()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getRParenLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(TL, getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getEndLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getEndLoc())
+        }));
+  // clang-format on
+
+  auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
+
+  // clang-format off
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+STRING_LOCATION_PAIR(TL, getAs<clang::FunctionTypeLoc>().getParensRange()),
+STRING_LOCATION_PAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getAs<clang::FunctionTypeLoc>().getReturnLoc().getSourceRange()),
+STRING_LOCATION_PAIR(TL, getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getNextTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getNextTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(TL, getSourceRange())
+          ));
+  // clang-format on
+}
+
+TEST(Introspection, SourceLocations_PointerTypeLoc) {
+  auto AST =
+      buildASTFromCode(R"cpp(
+int* i;
+)cpp",
+                       "foo.cpp", std::make_shared<PCHContainerOperations>());
+  auto &Ctx = AST->getASTContext();
+  auto &TU = *Ctx.getTranslationUnitDecl();
+
+  auto BoundNodes = ast_matchers::match(
+      decl(hasDescendant(
+          varDecl(hasName("i"), hasDescendant(loc(pointerType()).bind("tl"))))),
+      TU, Ctx);
+
+  EXPECT_EQ(BoundNodes.size(), 1u);
+
+  const auto *TL = BoundNodes[0].getNodeAs<TypeLoc>("tl");
+  auto Result = NodeIntrospection::GetLocations(*TL);
+
+  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
+    return;
+  }
+
+  auto ExpectedLocations =
+      FormatExpected<SourceLocation>(Result.LocationAccessors);
+
+  llvm::sort(ExpectedLocations);
+
+  // clang-format off
+  EXPECT_EQ(
+      llvm::makeArrayRef(ExpectedLocations),
+      (ArrayRef<std::pair<std::string, SourceLocation>>{
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getSigilLoc()),
+STRING_LOCATION_STDPAIR(TL, getAs<clang::PointerTypeLoc>().getStarLoc()),
+STRING_LOCATION_STDPAIR(TL, getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getEndLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getAs<clang::BuiltinTypeLoc>().getBuiltinLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getAs<clang::BuiltinTypeLoc>().getNameLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(TL, getNextTypeLoc().getEndLoc())
+}));
+  // clang-format on
+
+  auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
+
+  // clang-format off
+  EXPECT_THAT(
+      ExpectedRanges,
+      UnorderedElementsAre(
+STRING_LOCATION_PAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getAs<clang::PointerTypeLoc>().getPointeeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(TL, getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getNextTypeLoc().getLocalSourceRange()),
+STRING_LOCATION_PAIR(TL, getNextTypeLoc().getSourceRange()),
+STRING_LOCATION_PAIR(TL, getSourceRange())
+          ));
+  // clang-format on
+}
+
+#ifndef _WIN32
+// This test doesn't work on windows due to use of the typeof extension.
+TEST(Introspection, SourceLocations_TypeOfTypeLoc) {
+  auto AST =
+      buildASTFromCode(R"cpp(
+typeof (static_cast<void *>(0)) i;
+)cpp",
+                       "foo.cpp", std::make_shared<PCHContainerOperations>());
+  auto &Ctx = AST->getASTContext();
+  auto &TU = *Ctx.getTranslationUnitDecl();
+
+  auto BoundNodes = ast_matchers::match(
+      decl(hasDescendant(
+          varDecl(hasName("i"), hasDescendant(loc(type()).bind("tl"))))),
+      TU, Ctx);
+
+  EXPECT_EQ(BoundNodes.size(), 1u);
+
+  const auto *TL = BoundNodes[0].getNodeAs<TypeLoc>("tl");
+  auto Result = NodeIntrospection::GetLocations(*TL);
+
+  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
+    return;
+  }
+
+  auto ExpectedLocations =
+      FormatExpected<SourceLocation>(Result.LocationAccessors);
+
+  EXPECT_THAT(ExpectedLocations,
+              UnorderedElementsAre(
+                  STRING_LOCATION_PAIR(TL, getBeginLoc()),
+                  STRING_LOCATION_PAIR(TL, getEndLoc()),
+                  STRING_LOCATION_PAIR(
+                      TL, getAs<clang::TypeOfExprTypeLoc>().getTypeofLoc()),
+                  STRING_LOCATION_PAIR(
+                      TL, getAs<clang::TypeOfExprTypeLoc>().getLParenLoc()),
+                  STRING_LOCATION_PAIR(
+                      TL, getAs<clang::TypeOfExprTypeLoc>().getRParenLoc())));
+
+  auto ExpectedRanges = FormatExpected<SourceRange>(Result.RangeAccessors);
+
+  EXPECT_THAT(ExpectedRanges,
+              UnorderedElementsAre(
+                  STRING_LOCATION_PAIR(TL, getLocalSourceRange()),
+                  STRING_LOCATION_PAIR(TL, getSourceRange()),
+                  STRING_LOCATION_PAIR(
+                      TL, getAs<clang::TypeOfExprTypeLoc>().getParensRange())));
 }
+#endif

From 1296af18c4254fa1d291474d2317422c25e26216 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Thu, 8 Jul 2021 01:22:21 +0900
Subject: [PATCH 49/52] [VE] Revert SelectionDAGNodes.h

Revert local modifications on SelectionDAGNodes.h caused by clang-format.
---
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 534 ++++++++++--------
 1 file changed, 296 insertions(+), 238 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index e9dcd6cb6371..8359ca97a1cb 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -83,7 +83,7 @@ struct SDVTList {
 
 namespace ISD {
 
-/// Node predicates
+  /// Node predicates
 
 /// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
 /// same constant or undefined, return true and return the constant value in
@@ -159,13 +159,19 @@ class SDValue {
   bool operator==(const SDValue &O) const {
     return Node == O.Node && ResNo == O.ResNo;
   }
-  bool operator!=(const SDValue &O) const { return !operator==(O); }
+  bool operator!=(const SDValue &O) const {
+    return !operator==(O);
+  }
   bool operator<(const SDValue &O) const {
     return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
   }
-  explicit operator bool() const { return Node != nullptr; }
+  explicit operator bool() const {
+    return Node != nullptr;
+  }
 
-  SDValue getValue(unsigned R) const { return SDValue(Node, R); }
+  SDValue getValue(unsigned R) const {
+    return SDValue(Node, R);
+  }
 
   /// Return true if this node is an operand of N.
   bool isOperandOf(const SDNode *N) const;
@@ -174,14 +180,18 @@ class SDValue {
   inline EVT getValueType() const;
 
   /// Return the simple ValueType of the referenced return value.
-  MVT getSimpleValueType() const { return getValueType().getSimpleVT(); }
+  MVT getSimpleValueType() const {
+    return getValueType().getSimpleVT();
+  }
 
   /// Returns the size of the value in bits.
   ///
   /// If the value type is a scalable vector type, the scalable property will
   /// be set and the runtime size will be a positive integer multiple of the
   /// base size.
-  TypeSize getValueSizeInBits() const { return getValueType().getSizeInBits(); }
+  TypeSize getValueSizeInBits() const {
+    return getValueType().getSizeInBits();
+  }
 
   uint64_t getScalarValueSizeInBits() const {
     return getValueType().getScalarType().getFixedSizeInBits();
@@ -209,7 +219,8 @@ class SDValue {
   /// In practice, this looks through token factors and non-volatile loads.
   /// In order to remain efficient, this only
   /// looks a couple of nodes in, it does not do an exhaustive search.
-  bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth = 2) const;
+  bool reachesChainWithoutSideEffects(SDValue Dest,
+                                      unsigned Depth = 2) const;
 
   /// Return true if there are no nodes using value ResNo of Node.
   inline bool use_empty() const;
@@ -218,7 +229,7 @@ class SDValue {
   inline bool hasOneUse() const;
 };
 
-template <> struct DenseMapInfo<SDValue> {
+template<> struct DenseMapInfo<SDValue> {
   static inline SDValue getEmptyKey() {
     SDValue V;
     V.ResNo = -1U;
@@ -233,8 +244,7 @@ template <> struct DenseMapInfo<SDValue> {
 
   static unsigned getHashValue(const SDValue &Val) {
     return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
-            (unsigned)((uintptr_t)Val.getNode() >> 9)) +
-           Val.getResNo();
+            (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
   }
 
   static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
@@ -244,12 +254,14 @@ template <> struct DenseMapInfo<SDValue> {
 
 /// Allow casting operators to work directly on
 /// SDValues as if they were SDNode*'s.
-template <> struct simplify_type<SDValue> {
+template<> struct simplify_type<SDValue> {
   using SimpleType = SDNode *;
 
-  static SimpleType getSimplifiedValue(SDValue &Val) { return Val.getNode(); }
+  static SimpleType getSimplifiedValue(SDValue &Val) {
+    return Val.getNode();
+  }
 };
-template <> struct simplify_type<const SDValue> {
+template<> struct simplify_type<const SDValue> {
   using SimpleType = /*const*/ SDNode *;
 
   static SimpleType getSimplifiedValue(const SDValue &Val) {
@@ -278,7 +290,7 @@ class SDUse {
   SDUse &operator=(const SDUse &) = delete;
 
   /// Normally SDUse will just implicitly convert to an SDValue that it holds.
-  operator const SDValue &() const { return Val; }
+  operator const SDValue&() const { return Val; }
 
   /// If implicit conversion to SDValue doesn't work, the get() method returns
   /// the SDValue.
@@ -298,13 +310,19 @@ class SDUse {
   EVT getValueType() const { return Val.getValueType(); }
 
   /// Convenience function for get().operator==
-  bool operator==(const SDValue &V) const { return Val == V; }
+  bool operator==(const SDValue &V) const {
+    return Val == V;
+  }
 
   /// Convenience function for get().operator!=
-  bool operator!=(const SDValue &V) const { return Val != V; }
+  bool operator!=(const SDValue &V) const {
+    return Val != V;
+  }
 
   /// Convenience function for get().operator<
-  bool operator<(const SDValue &V) const { return Val < V; }
+  bool operator<(const SDValue &V) const {
+    return Val < V;
+  }
 
 private:
   friend class SelectionDAG;
@@ -326,25 +344,25 @@ class SDUse {
 
   void addToList(SDUse **List) {
     Next = *List;
-    if (Next)
-      Next->Prev = &Next;
+    if (Next) Next->Prev = &Next;
     Prev = List;
     *List = this;
   }
 
   void removeFromList() {
     *Prev = Next;
-    if (Next)
-      Next->Prev = Prev;
+    if (Next) Next->Prev = Prev;
   }
 };
 
 /// simplify_type specializations - Allow casting operators to work directly on
 /// SDValues as if they were SDNode*'s.
-template <> struct simplify_type<SDUse> {
+template<> struct simplify_type<SDUse> {
   using SimpleType = SDNode *;
 
-  static SimpleType getSimplifiedValue(SDUse &Val) { return Val.getNode(); }
+  static SimpleType getSimplifiedValue(SDUse &Val) {
+    return Val.getNode();
+  }
 };
 
 /// These are IR-level optimization flags that may be propagated to SDNodes.
@@ -456,7 +474,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
 #define END_TWO_BYTE_PACK()
 #endif
 
-  BEGIN_TWO_BYTE_PACK()
+BEGIN_TWO_BYTE_PACK()
   class SDNodeBitfields {
     friend class SDNode;
     friend class MemIntrinsicSDNode;
@@ -541,7 +559,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
     LoadSDNodeBitfields LoadSDNodeBits;
     StoreSDNodeBitfields StoreSDNodeBits;
   };
-  END_TWO_BYTE_PACK()
+END_TWO_BYTE_PACK()
 #undef BEGIN_TWO_BYTE_PACK
 #undef END_TWO_BYTE_PACK
 
@@ -605,7 +623,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   /// pre-isel nodes (those for which isMachineOpcode returns false), these
   /// are the opcode values in the ISD and <target>ISD namespaces. For
   /// post-isel opcodes, see getMachineOpcode.
-  unsigned getOpcode() const { return (unsigned short)NodeType; }
+  unsigned getOpcode()  const { return (unsigned short)NodeType; }
 
   /// Test if this node has a target-specific opcode (in the
   /// \<target\>ISD namespace).
@@ -644,14 +662,14 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   /// Test if this node is a strict floating point pseudo-op.
   bool isStrictFPOpcode() {
     switch (NodeType) {
-    default:
-      return false;
-    case ISD::STRICT_FP16_TO_FP:
-    case ISD::STRICT_FP_TO_FP16:
+      default:
+        return false;
+      case ISD::STRICT_FP16_TO_FP:
+      case ISD::STRICT_FP_TO_FP16:
 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
-  case ISD::STRICT_##DAGN:
+      case ISD::STRICT_##DAGN:
 #include "llvm/IR/ConstrainedOps.def"
-      return true;
+        return true;
     }
   }
 
@@ -769,23 +787,25 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
     use_iterator() = default;
     use_iterator(const use_iterator &I) : Op(I.Op) {}
 
-    bool operator==(const use_iterator &x) const { return Op == x.Op; }
-    bool operator!=(const use_iterator &x) const { return !operator==(x); }
+    bool operator==(const use_iterator &x) const {
+      return Op == x.Op;
+    }
+    bool operator!=(const use_iterator &x) const {
+      return !operator==(x);
+    }
 
     /// Return true if this iterator is at the end of uses list.
     bool atEnd() const { return Op == nullptr; }
 
     // Iterator traversal: forward iteration only.
-    use_iterator &operator++() { // Preincrement
+    use_iterator &operator++() {          // Preincrement
       assert(Op && "Cannot increment end iterator!");
       Op = Op->getNext();
       return *this;
     }
 
-    use_iterator operator++(int) { // Postincrement
-      use_iterator tmp = *this;
-      ++*this;
-      return tmp;
+    use_iterator operator++(int) {        // Postincrement
+      use_iterator tmp = *this; ++*this; return tmp;
     }
 
     /// Retrieve a pointer to the current user node.
@@ -806,7 +826,9 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   };
 
   /// Provide iteration support to walk over all uses of an SDNode.
-  use_iterator use_begin() const { return use_iterator(UseList); }
+  use_iterator use_begin() const {
+    return use_iterator(UseList);
+  }
 
   static use_iterator use_end() { return use_iterator(nullptr); }
 
@@ -933,15 +955,17 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   using op_iterator = SDUse *;
 
   op_iterator op_begin() const { return OperandList; }
-  op_iterator op_end() const { return OperandList + NumOperands; }
+  op_iterator op_end() const { return OperandList+NumOperands; }
   ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
 
   /// Iterator for directly iterating over the operand SDValue's.
   struct value_op_iterator
-      : iterator_adaptor_base<
-            value_op_iterator, op_iterator, std::random_access_iterator_tag,
-            SDValue, ptrdiff_t, value_op_iterator *, value_op_iterator *> {
-    explicit value_op_iterator(SDUse *U = nullptr) : iterator_adaptor_base(U) {}
+      : iterator_adaptor_base<value_op_iterator, op_iterator,
+                              std::random_access_iterator_tag, SDValue,
+                              ptrdiff_t, value_op_iterator *,
+                              value_op_iterator *> {
+    explicit value_op_iterator(SDUse *U = nullptr)
+      : iterator_adaptor_base(U) {}
 
     const SDValue &operator*() const { return I->get(); }
   };
@@ -952,7 +976,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   }
 
   SDVTList getVTList() const {
-    SDVTList X = {ValueList, NumValues};
+    SDVTList X = { ValueList, NumValues };
     return X;
   }
 
@@ -960,8 +984,8 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   /// to which the glue operand points. Otherwise return NULL.
   SDNode *getGluedNode() const {
     if (getNumOperands() != 0 &&
-        getOperand(getNumOperands() - 1).getValueType() == MVT::Glue)
-      return getOperand(getNumOperands() - 1).getNode();
+        getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
+      return getOperand(getNumOperands()-1).getNode();
     return nullptr;
   }
 
@@ -1007,14 +1031,14 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
   using value_iterator = const EVT *;
 
   value_iterator value_begin() const { return ValueList; }
-  value_iterator value_end() const { return ValueList + NumValues; }
+  value_iterator value_end() const { return ValueList+NumValues; }
   iterator_range<value_iterator> values() const {
     return llvm::make_range(value_begin(), value_end());
   }
 
   /// Return the opcode of this operation for printing.
   std::string getOperationName(const SelectionDAG *G = nullptr) const;
-  static const char *getIndexedModeName(ISD::MemIndexedMode AM);
+  static const char* getIndexedModeName(ISD::MemIndexedMode AM);
   void print_types(raw_ostream &OS, const SelectionDAG *G) const;
   void print_details(raw_ostream &OS, const SelectionDAG *G) const;
   void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
@@ -1075,7 +1099,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
 
 protected:
   static SDVTList getSDVTList(EVT VT) {
-    SDVTList Ret = {getValueTypeList(VT), 1};
+    SDVTList Ret = { getValueTypeList(VT), 1 };
     return Ret;
   }
 
@@ -1094,7 +1118,7 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
 
   /// Release the operands and set this node to have zero operands.
   void DropOperands();
-}; // namespace llvm
+};
 
 /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
 /// into SDNode creation functions.
@@ -1136,9 +1160,13 @@ inline SDValue::SDValue(SDNode *node, unsigned resno)
   assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.");
 }
 
-inline unsigned SDValue::getOpcode() const { return Node->getOpcode(); }
+inline unsigned SDValue::getOpcode() const {
+  return Node->getOpcode();
+}
 
-inline EVT SDValue::getValueType() const { return Node->getValueType(ResNo); }
+inline EVT SDValue::getValueType() const {
+  return Node->getValueType(ResNo);
+}
 
 inline unsigned SDValue::getNumOperands() const {
   return Node->getNumOperands();
@@ -1156,19 +1184,25 @@ inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
   return Node->getConstantOperandAPInt(i);
 }
 
-inline bool SDValue::isTargetOpcode() const { return Node->isTargetOpcode(); }
+inline bool SDValue::isTargetOpcode() const {
+  return Node->isTargetOpcode();
+}
 
 inline bool SDValue::isTargetMemoryOpcode() const {
   return Node->isTargetMemoryOpcode();
 }
 
-inline bool SDValue::isMachineOpcode() const { return Node->isMachineOpcode(); }
+inline bool SDValue::isMachineOpcode() const {
+  return Node->isMachineOpcode();
+}
 
 inline unsigned SDValue::getMachineOpcode() const {
   return Node->getMachineOpcode();
 }
 
-inline bool SDValue::isUndef() const { return Node->isUndef(); }
+inline bool SDValue::isUndef() const {
+  return Node->isUndef();
+}
 
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
@@ -1182,11 +1216,17 @@ inline const DebugLoc &SDValue::getDebugLoc() const {
   return Node->getDebugLoc();
 }
 
-inline void SDValue::dump() const { return Node->dump(); }
+inline void SDValue::dump() const {
+  return Node->dump();
+}
 
-inline void SDValue::dump(const SelectionDAG *G) const { return Node->dump(G); }
+inline void SDValue::dump(const SelectionDAG *G) const {
+  return Node->dump(G);
+}
 
-inline void SDValue::dumpr() const { return Node->dumpr(); }
+inline void SDValue::dumpr() const {
+  return Node->dumpr();
+}
 
 inline void SDValue::dumpr(const SelectionDAG *G) const {
   return Node->dumpr(G);
@@ -1195,11 +1235,9 @@ inline void SDValue::dumpr(const SelectionDAG *G) const {
 // Define inline functions from the SDUse class.
 
 inline void SDUse::set(const SDValue &V) {
-  if (Val.getNode())
-    removeFromList();
+  if (Val.getNode()) removeFromList();
   Val = V;
-  if (V.getNode())
-    V.getNode()->addUse(*this);
+  if (V.getNode()) V.getNode()->addUse(*this);
 }
 
 inline void SDUse::setInitial(const SDValue &V) {
@@ -1208,11 +1246,9 @@ inline void SDUse::setInitial(const SDValue &V) {
 }
 
 inline void SDUse::setNode(SDNode *N) {
-  if (Val.getNode())
-    removeFromList();
+  if (Val.getNode()) removeFromList();
   Val.setNode(N);
-  if (N)
-    N->addUse(*this);
+  if (N) N->addUse(*this);
 }
 
 /// This class is used to form a handle around another node that
@@ -1224,7 +1260,7 @@ class HandleSDNode : public SDNode {
 
 public:
   explicit HandleSDNode(SDValue X)
-      : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
+    : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
     // HandleSDNodes are never inserted into the DAG, so they won't be
     // auto-numbered. Use ID 65535 as a sentinel.
     PersistentId = 0xffff;
@@ -1351,7 +1387,9 @@ class MemSDNode : public SDNode {
   }
 
   /// Return the address space for the associated pointer
-  unsigned getAddressSpace() const { return getPointerInfo().getAddrSpace(); }
+  unsigned getAddressSpace() const {
+    return getPointerInfo().getAddrSpace();
+  }
 
   /// Update this MemSDNode's MachineMemOperand information
   /// to reflect the alignment of NewMMO, if it has a greater alignment.
@@ -1382,31 +1420,36 @@ class MemSDNode : public SDNode {
   static bool classof(const SDNode *N) {
     // For some targets, we lower some target intrinsics to a MemIntrinsicNode
     // with either an intrinsic or a target opcode.
-    return N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE ||
-           N->getOpcode() == ISD::PREFETCH ||
-           N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
+    return N->getOpcode() == ISD::LOAD                ||
+           N->getOpcode() == ISD::STORE               ||
+           N->getOpcode() == ISD::PREFETCH            ||
+           N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
            N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
-           N->getOpcode() == ISD::ATOMIC_SWAP ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
-           N->getOpcode() == ISD::ATOMIC_LOAD ||
-           N->getOpcode() == ISD::ATOMIC_STORE ||
-           N->getOpcode() == ISD::MLOAD || N->getOpcode() == ISD::MSTORE ||
-           N->getOpcode() == ISD::MGATHER || N->getOpcode() == ISD::MSCATTER ||
-           N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE ||
-           N->getOpcode() == ISD::VP_GATHER ||
-           N->getOpcode() == ISD::VP_SCATTER || N->isMemIntrinsic() ||
+           N->getOpcode() == ISD::ATOMIC_SWAP         ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_CLR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MIN     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FADD    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD         ||
+           N->getOpcode() == ISD::ATOMIC_STORE        ||
+           N->getOpcode() == ISD::MLOAD               ||
+           N->getOpcode() == ISD::MSTORE              ||
+           N->getOpcode() == ISD::MGATHER             ||
+           N->getOpcode() == ISD::MSCATTER            ||
+           N->getOpcode() == ISD::VP_LOAD             ||
+           N->getOpcode() == ISD::VP_STORE            ||
+           N->getOpcode() == ISD::VP_GATHER           ||
+           N->getOpcode() == ISD::VP_SCATTER          ||
+           N->isMemIntrinsic()                        ||
            N->isTargetMemoryOpcode();
   }
 };
@@ -1416,10 +1459,9 @@ class AtomicSDNode : public MemSDNode {
 public:
   AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
                EVT MemVT, MachineMemOperand *MMO)
-      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
     assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||
-            MMO->isAtomic()) &&
-           "then why are we using an AtomicSDNode?");
+            MMO->isAtomic()) && "then why are we using an AtomicSDNode?");
   }
 
   const SDValue &getBasePtr() const { return getOperand(1); }
@@ -1442,23 +1484,23 @@ class AtomicSDNode : public MemSDNode {
 
   // Methods to support isa and dyn_cast
   static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
+    return N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
            N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
-           N->getOpcode() == ISD::ATOMIC_SWAP ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
-           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
-           N->getOpcode() == ISD::ATOMIC_LOAD ||
+           N->getOpcode() == ISD::ATOMIC_SWAP         ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_CLR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MIN     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FADD    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD         ||
            N->getOpcode() == ISD::ATOMIC_STORE;
   }
 };
@@ -1479,7 +1521,8 @@ class MemIntrinsicSDNode : public MemSDNode {
   static bool classof(const SDNode *N) {
     // We lower some target intrinsics to their target opcode
     // early a node with a target opcode can be of this class
-    return N->isMemIntrinsic() || N->getOpcode() == ISD::PREFETCH ||
+    return N->isMemIntrinsic()             ||
+           N->getOpcode() == ISD::PREFETCH ||
            N->isTargetMemoryOpcode();
   }
 };
@@ -1604,7 +1647,7 @@ class ConstantFPSDNode : public SDNode {
         Value(val) {}
 
 public:
-  const APFloat &getValueAPF() const { return Value->getValueAPF(); }
+  const APFloat& getValueAPF() const { return Value->getValueAPF(); }
   const ConstantFP *getConstantFPValue() const { return Value; }
 
   /// Return true if the value is positive or negative zero.
@@ -1630,9 +1673,9 @@ class ConstantFPSDNode : public SDNode {
   bool isExactlyValue(double V) const {
     return Value->getValueAPF().isExactlyValue(V);
   }
-  bool isExactlyValue(const APFloat &V) const;
+  bool isExactlyValue(const APFloat& V) const;
 
-  static bool isValueValidForType(EVT VT, const APFloat &Val);
+  static bool isValueValidForType(EVT VT, const APFloat& Val);
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::ConstantFP ||
@@ -1714,7 +1757,8 @@ class GlobalAddressSDNode : public SDNode {
   unsigned TargetFlags;
 
   GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
-                      const GlobalValue *GA, EVT VT, int64_t o, unsigned TF);
+                      const GlobalValue *GA, EVT VT, int64_t o,
+                      unsigned TF);
 
 public:
   const GlobalValue *getGlobal() const { return TheGlobal; }
@@ -1737,9 +1781,9 @@ class FrameIndexSDNode : public SDNode {
   int FI;
 
   FrameIndexSDNode(int fi, EVT VT, bool isTarg)
-      : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, 0, DebugLoc(),
-               getSDVTList(VT)),
-        FI(fi) {}
+    : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
+      0, DebugLoc(), getSDVTList(VT)), FI(fi) {
+  }
 
 public:
   int getIndex() const { return FI; }
@@ -1760,7 +1804,6 @@ class LifetimeSDNode : public SDNode {
   LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
                  SDVTList VTs, int64_t Size, int64_t Offset)
       : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
-
 public:
   int64_t getFrameIndex() const {
     return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
@@ -1816,9 +1859,9 @@ class JumpTableSDNode : public SDNode {
   unsigned TargetFlags;
 
   JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
-      : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, 0, DebugLoc(),
-               getSDVTList(VT)),
-        JTI(jti), TargetFlags(TF) {}
+    : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
+      0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
+  }
 
 public:
   int getIndex() const { return JTI; }
@@ -1857,11 +1900,13 @@ class ConstantPoolSDNode : public SDNode {
         Offset(o), Alignment(Alignment), TargetFlags(TF) {
     assert(Offset >= 0 && "Offset is too large");
     Val.MachineCPVal = v;
-    Offset |= 1 << (sizeof(unsigned) * CHAR_BIT - 1);
+    Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
   }
 
 public:
-  bool isMachineConstantPoolEntry() const { return Offset < 0; }
+  bool isMachineConstantPoolEntry() const {
+    return Offset < 0;
+  }
 
   const Constant *getConstVal() const {
     assert(!isMachineConstantPoolEntry() && "Wrong constantpool type");
@@ -1874,7 +1919,7 @@ class ConstantPoolSDNode : public SDNode {
   }
 
   int getOffset() const {
-    return Offset & ~(1 << (sizeof(unsigned) * CHAR_BIT - 1));
+    return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
   }
 
   // Return the alignment of this constant pool object, which is either 0 (for
@@ -1921,8 +1966,8 @@ class BasicBlockSDNode : public SDNode {
   /// blocks out of order when they're jumped to, which makes it a bit
   /// harder.  Let's see if we need it first.
   explicit BasicBlockSDNode(MachineBasicBlock *mbb)
-      : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)),
-        MBB(mbb) {}
+    : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
+  {}
 
 public:
   MachineBasicBlock *getBasicBlock() const { return MBB; }
@@ -2056,7 +2101,7 @@ class SrcValueSDNode : public SDNode {
 
   /// Create a SrcValue for a general value.
   explicit SrcValueSDNode(const Value *v)
-      : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
+    : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
 
 public:
   /// Return the contained Value.
@@ -2073,8 +2118,8 @@ class MDNodeSDNode : public SDNode {
   const MDNode *MD;
 
   explicit MDNodeSDNode(const MDNode *md)
-      : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
-        MD(md) {}
+  : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
+  {}
 
 public:
   const MDNode *getMD() const { return MD; }
@@ -2090,7 +2135,7 @@ class RegisterSDNode : public SDNode {
   Register Reg;
 
   RegisterSDNode(Register reg, EVT VT)
-      : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
+    : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
 
 public:
   Register getReg() const { return Reg; }
@@ -2107,8 +2152,8 @@ class RegisterMaskSDNode : public SDNode {
   const uint32_t *RegMask;
 
   RegisterMaskSDNode(const uint32_t *mask)
-      : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
-        RegMask(mask) {}
+    : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
+      RegMask(mask) {}
 
 public:
   const uint32_t *getRegMask() const { return RegMask; }
@@ -2125,10 +2170,10 @@ class BlockAddressSDNode : public SDNode {
   int64_t Offset;
   unsigned TargetFlags;
 
-  BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, int64_t o,
-                     unsigned Flags)
-      : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), BA(ba), Offset(o),
-        TargetFlags(Flags) {}
+  BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
+                     int64_t o, unsigned Flags)
+    : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
+             BA(ba), Offset(o), TargetFlags(Flags) {}
 
 public:
   const BlockAddress *getBlockAddress() const { return BA; }
@@ -2203,8 +2248,8 @@ class CondCodeSDNode : public SDNode {
   ISD::CondCode Condition;
 
   explicit CondCodeSDNode(ISD::CondCode Cond)
-      : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
-        Condition(Cond) {}
+    : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
+      Condition(Cond) {}
 
 public:
   ISD::CondCode get() const { return Condition; }
@@ -2222,8 +2267,8 @@ class VTSDNode : public SDNode {
   EVT ValueType;
 
   explicit VTSDNode(EVT VT)
-      : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
-        ValueType(VT) {}
+    : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
+      ValueType(VT) {}
 
 public:
   EVT getVT() const { return ValueType; }
@@ -2261,7 +2306,8 @@ class LSBaseSDNode : public MemSDNode {
   bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
 
   static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE;
+    return N->getOpcode() == ISD::LOAD ||
+           N->getOpcode() == ISD::STORE;
   }
 };
 
@@ -2288,7 +2334,9 @@ class LoadSDNode : public LSBaseSDNode {
   const SDValue &getBasePtr() const { return getOperand(1); }
   const SDValue &getOffset() const { return getOperand(2); }
 
-  static bool classof(const SDNode *N) { return N->getOpcode() == ISD::LOAD; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::LOAD;
+  }
 };
 
 /// This class is used to represent ISD::STORE nodes.
@@ -2317,7 +2365,9 @@ class StoreSDNode : public LSBaseSDNode {
   const SDValue &getBasePtr() const { return getOperand(2); }
   const SDValue &getOffset() const { return getOperand(3); }
 
-  static bool classof(const SDNode *N) { return N->getOpcode() == ISD::STORE; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::STORE;
+  }
 };
 
 /// This base class is used to represent VP_LOAD and VP_STORE nodes
@@ -2443,7 +2493,8 @@ class MaskedLoadStoreSDNode : public MemSDNode {
   bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
 
   static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::MLOAD || N->getOpcode() == ISD::MSTORE;
+    return N->getOpcode() == ISD::MLOAD ||
+           N->getOpcode() == ISD::MSTORE;
   }
 };
 
@@ -2469,7 +2520,9 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
   const SDValue &getMask() const { return getOperand(3); }
   const SDValue &getPassThru() const { return getOperand(4); }
 
-  static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MLOAD;
+  }
 
   bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
 };
@@ -2503,7 +2556,9 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
   const SDValue &getOffset() const { return getOperand(3); }
   const SDValue &getMask() const { return getOperand(4); }
 
-  static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MSTORE; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MSTORE;
+  }
 };
 
 /// This is a base class used to represent
@@ -2630,12 +2685,13 @@ class MaskedGatherScatterSDNode : public MemSDNode {
   // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
   // Mask is a vector of i1 elements
   const SDValue &getBasePtr() const { return getOperand(3); }
-  const SDValue &getIndex() const { return getOperand(4); }
-  const SDValue &getMask() const { return getOperand(2); }
-  const SDValue &getScale() const { return getOperand(5); }
+  const SDValue &getIndex()   const { return getOperand(4); }
+  const SDValue &getMask()    const { return getOperand(2); }
+  const SDValue &getScale()   const { return getOperand(5); }
 
   static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::MGATHER || N->getOpcode() == ISD::MSCATTER;
+    return N->getOpcode() == ISD::MGATHER ||
+           N->getOpcode() == ISD::MSCATTER;
   }
 };
 
@@ -2749,7 +2805,9 @@ class MachineSDNode : public SDNode {
     NumMemRefs = 0;
   }
 
-  static bool classof(const SDNode *N) { return N->isMachineOpcode(); }
+  static bool classof(const SDNode *N) {
+    return N->isMachineOpcode();
+  }
 };
 
 /// An SDNode that records if a register contains a value that is guaranteed to
@@ -2784,19 +2842,19 @@ class SDNodeIterator {
   bool operator==(const SDNodeIterator& x) const {
     return Operand == x.Operand;
   }
-  bool operator!=(const SDNodeIterator &x) const { return !operator==(x); }
+  bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
 
-  pointer operator*() const { return Node->getOperand(Operand).getNode(); }
+  pointer operator*() const {
+    return Node->getOperand(Operand).getNode();
+  }
   pointer operator->() const { return operator*(); }
 
-  SDNodeIterator &operator++() { // Preincrement
+  SDNodeIterator& operator++() {                // Preincrement
     ++Operand;
     return *this;
   }
   SDNodeIterator operator++(int) { // Postincrement
-    SDNodeIterator tmp = *this;
-    ++*this;
-    return tmp;
+    SDNodeIterator tmp = *this; ++*this; return tmp;
   }
   size_t operator-(SDNodeIterator Other) const {
     assert(Node == Other.Node &&
@@ -2805,7 +2863,7 @@ class SDNodeIterator {
   }
 
   static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
-  static SDNodeIterator end(const SDNode *N) {
+  static SDNodeIterator end  (const SDNode *N) {
     return SDNodeIterator(N, N->getNumOperands());
   }
 
@@ -2813,7 +2871,7 @@ class SDNodeIterator {
   const SDNode *getNode() const { return Node; }
 };
 
-template <> struct GraphTraits<SDNode *> {
+template <> struct GraphTraits<SDNode*> {
   using NodeRef = SDNode *;
   using ChildIteratorType = SDNodeIterator;
 
@@ -2842,91 +2900,91 @@ using MostAlignedSDNode = GlobalAddressSDNode;
 
 namespace ISD {
 
-/// Returns true if the specified node is a non-extending and unindexed load.
-inline bool isNormalLoad(const SDNode *N) {
-  const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
-  return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
-         Ld->getAddressingMode() == ISD::UNINDEXED;
-}
+  /// Returns true if the specified node is a non-extending and unindexed load.
+  inline bool isNormalLoad(const SDNode *N) {
+    const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
+    return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
+      Ld->getAddressingMode() == ISD::UNINDEXED;
+  }
 
-/// Returns true if the specified node is a non-extending load.
-inline bool isNON_EXTLoad(const SDNode *N) {
-  return isa<LoadSDNode>(N) &&
-         cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
-}
+  /// Returns true if the specified node is a non-extending load.
+  inline bool isNON_EXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+  }
 
-/// Returns true if the specified node is a EXTLOAD.
-inline bool isEXTLoad(const SDNode *N) {
-  return isa<LoadSDNode>(N) &&
-         cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
-}
+  /// Returns true if the specified node is a EXTLOAD.
+  inline bool isEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
+  }
 
-/// Returns true if the specified node is a SEXTLOAD.
-inline bool isSEXTLoad(const SDNode *N) {
-  return isa<LoadSDNode>(N) &&
-         cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
-}
+  /// Returns true if the specified node is a SEXTLOAD.
+  inline bool isSEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+  }
 
-/// Returns true if the specified node is a ZEXTLOAD.
-inline bool isZEXTLoad(const SDNode *N) {
-  return isa<LoadSDNode>(N) &&
-         cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
-}
+  /// Returns true if the specified node is a ZEXTLOAD.
+  inline bool isZEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+  }
 
-/// Returns true if the specified node is an unindexed load.
-inline bool isUNINDEXEDLoad(const SDNode *N) {
-  return isa<LoadSDNode>(N) &&
-         cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}
+  /// Returns true if the specified node is an unindexed load.
+  inline bool isUNINDEXEDLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+  }
 
-/// Returns true if the specified node is a non-truncating
-/// and unindexed store.
-inline bool isNormalStore(const SDNode *N) {
-  const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
-  return St && !St->isTruncatingStore() &&
-         St->getAddressingMode() == ISD::UNINDEXED;
-}
+  /// Returns true if the specified node is a non-truncating
+  /// and unindexed store.
+  inline bool isNormalStore(const SDNode *N) {
+    const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
+    return St && !St->isTruncatingStore() &&
+      St->getAddressingMode() == ISD::UNINDEXED;
+  }
 
-/// Returns true if the specified node is a non-truncating store.
-inline bool isNON_TRUNCStore(const SDNode *N) {
-  return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
-}
+  /// Returns true if the specified node is a non-truncating store.
+  inline bool isNON_TRUNCStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
+  }
 
-/// Returns true if the specified node is a truncating store.
-inline bool isTRUNCStore(const SDNode *N) {
-  return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
-}
+  /// Returns true if the specified node is a truncating store.
+  inline bool isTRUNCStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
+  }
 
-/// Returns true if the specified node is an unindexed store.
-inline bool isUNINDEXEDStore(const SDNode *N) {
-  return isa<StoreSDNode>(N) &&
-         cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}
+  /// Returns true if the specified node is an unindexed store.
+  inline bool isUNINDEXEDStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) &&
+      cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+  }
 
-/// Attempt to match a unary predicate against a scalar/splat constant or
-/// every element of a constant BUILD_VECTOR.
-/// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
-bool matchUnaryPredicate(SDValue Op,
-                         std::function<bool(ConstantSDNode *)> Match,
-                         bool AllowUndefs = false);
-
-/// Attempt to match a binary predicate against a pair of scalar/splat
-/// constants or every element of a pair of constant BUILD_VECTORs.
-/// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
-/// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
-bool matchBinaryPredicate(
-    SDValue LHS, SDValue RHS,
-    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
-    bool AllowUndefs = false, bool AllowTypeMismatch = false);
-
-/// Returns true if the specified value is the overflow result from one
-/// of the overflow intrinsic nodes.
-inline bool isOverflowIntrOpRes(SDValue Op) {
-  unsigned Opc = Op.getOpcode();
-  return (Op.getResNo() == 1 &&
-          (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-           Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
-}
+  /// Attempt to match a unary predicate against a scalar/splat constant or
+  /// every element of a constant BUILD_VECTOR.
+  /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
+  bool matchUnaryPredicate(SDValue Op,
+                           std::function<bool(ConstantSDNode *)> Match,
+                           bool AllowUndefs = false);
+
+  /// Attempt to match a binary predicate against a pair of scalar/splat
+  /// constants or every element of a pair of constant BUILD_VECTORs.
+  /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
+  /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
+  bool matchBinaryPredicate(
+      SDValue LHS, SDValue RHS,
+      std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+      bool AllowUndefs = false, bool AllowTypeMismatch = false);
+
+  /// Returns true if the specified value is the overflow result from one
+  /// of the overflow intrinsic nodes.
+  inline bool isOverflowIntrOpRes(SDValue Op) {
+    unsigned Opc = Op.getOpcode();
+    return (Op.getResNo() == 1 &&
+            (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+             Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
+  }
 
 } // end namespace ISD
 

From d05f42cd543caec9b707339a9a809f7053766169 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Thu, 8 Jul 2021 01:22:21 +0900
Subject: [PATCH 50/52] [VE] Revert local modifications caused by clang-format

Revert local modifications on following files caused by clang-format.
- clang/lib/CodeGen/CGExprScalar.cpp
- libunwind/src/Registers.hpp
- llvm/include/llvm/Analysis/DivergenceAnalysis.h
- llvm/include/llvm/Analysis/LoopAccessAnalysis.h
- llvm/lib/Analysis/TargetTransformInfo.cpp
- llvm/lib/IR/CMakeLists.txt
---
 clang/lib/CodeGen/CGExprScalar.cpp            |  1 +
 libunwind/src/Registers.hpp                   |  1 -
 .../llvm/Analysis/DivergenceAnalysis.h        |  3 +--
 .../llvm/Analysis/LoopAccessAnalysis.h        | 23 ++++++++++---------
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  7 +++---
 llvm/lib/IR/CMakeLists.txt                    |  1 +
 6 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 43f033e00948..9002163199ab 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2099,6 +2099,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
       return EmitLoadOfLValue(DestLV, CE->getExprLoc());
     }
+
     return Builder.CreateBitCast(Src, DstTy);
   }
   case CK_AddressSpaceConversion: {
diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index a4c75ae47bd3..aea84cc22721 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -3794,7 +3794,6 @@ class _LIBUNWIND_HIDDEN Registers_riscv {
 # endif
 };
 
-
 inline Registers_riscv::Registers_riscv(const void *registers) {
   static_assert((check_fit<Registers_riscv, unw_context_t>::does_fit),
                 "riscv registers do not fit into unw_context_t");
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index b4d8710a04eb..0b36ef35aa59 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -129,8 +129,7 @@ class DivergenceAnalysisImpl {
   const LoopInfo &LI;
 
   // Recognized divergent loops
-  DenseSet<const Loop *>
-      DivergentLoops; // FIXME Deprecated. For statistics only.
+  DenseSet<const Loop *> DivergentLoops;
 
   // The SDA links divergent branches to divergent control-flow joins.
   SyncDependenceAnalysis &SDA;
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 49420515be8a..39acfd5bbbee 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -269,7 +269,7 @@ class MemoryDepChecker {
   const Loop *InnermostLoop;
 
   /// Maps access locations (ptr, read/write) to program order.
-  DenseMap<MemAccessInfo, std::vector<unsigned>> Accesses;
+  DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
 
   /// Memory access instructions in program order.
   SmallVector<Instruction *, 16> InstMap;
@@ -550,7 +550,7 @@ class LoopAccessInfo {
 
   uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
   unsigned getNumStores() const { return NumStores; }
-  unsigned getNumLoads() const { return NumLoads; }
+  unsigned getNumLoads() const { return NumLoads;}
 
   /// The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
@@ -592,8 +592,8 @@ class LoopAccessInfo {
 
 private:
   /// Analyze the loop.
-  void analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
-                   DominatorTree *DT);
+  void analyzeLoop(AAResults *AA, LoopInfo *LI,
+                   const TargetLibraryInfo *TLI, DominatorTree *DT);
 
   /// Check if the structure of the loop allows it to be analyzed by this
   /// pass.
@@ -756,7 +756,8 @@ class LoopAccessLegacyAnalysis : public FunctionPass {
 /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
 /// getResult return a LoopAccessInfo object.  See this class for the
 /// specifics of what information is provided.
-class LoopAccessAnalysis : public AnalysisInfoMixin<LoopAccessAnalysis> {
+class LoopAccessAnalysis
+    : public AnalysisInfoMixin<LoopAccessAnalysis> {
   friend AnalysisInfoMixin<LoopAccessAnalysis>;
   static AnalysisKey Key;
 
@@ -766,16 +767,16 @@ class LoopAccessAnalysis : public AnalysisInfoMixin<LoopAccessAnalysis> {
   Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
 };
 
-inline Instruction *
-MemoryDepChecker::Dependence::getSource(const LoopAccessInfo &LAI) const {
+inline Instruction *MemoryDepChecker::Dependence::getSource(
+    const LoopAccessInfo &LAI) const {
   return LAI.getDepChecker().getMemoryInstructions()[Source];
 }
 
-inline Instruction *
-MemoryDepChecker::Dependence::getDestination(const LoopAccessInfo &LAI) const {
+inline Instruction *MemoryDepChecker::Dependence::getDestination(
+    const LoopAccessInfo &LAI) const {
   return LAI.getDepChecker().getMemoryInstructions()[Destination];
 }
 
-} // namespace llvm
+} // End llvm namespace
 
-#endif
\ No newline at end of file
+#endif
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b3118b0f2dd6..72e056fa16d7 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -561,9 +561,10 @@ int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
   return Cost;
 }
 
-int TargetTransformInfo::getIntImmCostIntrin(
-    Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty,
-    TTI::TargetCostKind CostKind) const {
+int
+TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                         const APInt &Imm, Type *Ty,
+                                         TTI::TargetCostKind CostKind) const {
   int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index 12e731da31ff..4d8f76b7c8fb 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_component_library(LLVMCore
   Value.cpp
   ValueSymbolTable.cpp
   Verifier.cpp
+
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR
 

From 8169b6b2e02bae78a32b84149bbbca407befc39d Mon Sep 17 00:00:00 2001
From: Simon Moll <simon.moll@emea.nec.com>
Date: Thu, 8 Jul 2021 16:13:53 +0200
Subject: [PATCH 51/52] [VP]workaround: Run ExpandVP in opt with legacy pm
 (default in isel)

---
 llvm/test/CodeGen/Generic/expand-vp.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll
index 3962a5185f6f..e6a4f1c21bbc 100644
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp.ll
@@ -1,4 +1,4 @@
-; RUN: opt --expand-vec-pred -S < %s | FileCheck %s
+; RUN: opt --enable-new-pm=0 --expand-vec-pred -S < %s | FileCheck %s
 
 define void @test_vp_int(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
 ; CHECK-NOT: {{call.* @llvm.vp.add}}

From c26b4d3d4ed4c13598219871c4e4c282ffc2a84e Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Sat, 10 Jul 2021 07:50:07 +0900
Subject: [PATCH 52/52] [VE] Disable relative lookup table converter pass for
 VE

VE hasn't implemented relative lookup table in /opt/nec/ve/bin/nld.
So we need to disable new pass introduced by https://reviews.llvm.org/D94355
for VE.
---
 llvm/lib/Target/VE/VETargetTransformInfo.cpp | 10 ++++++++++
 llvm/lib/Target/VE/VETargetTransformInfo.h   |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.cpp b/llvm/lib/Target/VE/VETargetTransformInfo.cpp
index 1dabbfa3cfba..742e3c934b13 100644
--- a/llvm/lib/Target/VE/VETargetTransformInfo.cpp
+++ b/llvm/lib/Target/VE/VETargetTransformInfo.cpp
@@ -120,3 +120,13 @@ void VETTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   UP.Runtime = UP.Partial = true;
 }
 /// } Unrolling
+
+bool VETTIImpl::shouldBuildRelLookupTables() const {
+  // NEC nld doesn't support relative lookup tables.  It shows following errors.
+  //   /opt/nec/ve/bin/nld: src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o
+  //   (.rodata+0x17b4): reloc against `.L.str.376': error 2
+  //   /opt/nec/ve/bin/nld: final link failed: Nonrepresentable section on
+  //   output
+  // So, we disable it.
+  return false;
+}
diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h
index ccc66f8327fd..c1a290e7dd5b 100644
--- a/llvm/lib/Target/VE/VETargetTransformInfo.h
+++ b/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -367,6 +367,8 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
     return !isSupportedReduction(II->getIntrinsicID(), Unordered);
   }
 
+  bool shouldBuildRelLookupTables() const;
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
                                TargetTransformInfo::UnrollingPreferences &UP);
 };