From dd684dca5216cad6a367bf96ba2bc777ec63e06e Mon Sep 17 00:00:00 2001
From: Cameron Zwarich <zwarich@mozilla.com>
Date: Wed, 25 Jun 2014 20:33:49 -0700
Subject: [PATCH 1/4] Add a NullCheckElimination pass

This pass is not Rust-specific, in that all of its transformations are
intended to be correct for arbitrary LLVM IR, but it targets idioms
found in IR generated by `rustc`, e.g. heavy use of `inbounds` GEPs.
---
 include/llvm/InitializePasses.h               |   3 +
 include/llvm/LinkAllPasses.h                  |   3 +
 include/llvm/Transforms/Scalar.h              |   7 +
 lib/Transforms/Scalar/CMakeLists.txt          |   1 +
 .../Scalar/NullCheckElimination.cpp           | 273 ++++++++++++++++++
 lib/Transforms/Scalar/Scalar.cpp              |   1 +
 test/Transforms/NullCheckElimination/basic.ll | 165 +++++++++++
 7 files changed, 453 insertions(+)
 create mode 100644 lib/Transforms/Scalar/NullCheckElimination.cpp
 create mode 100644 test/Transforms/NullCheckElimination/basic.ll

diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 0c840f39f522..1e7090b4094f 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -275,6 +275,9 @@ void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 void initializeStackMapLivenessPass(PassRegistry&);
 void initializeLoadCombinePass(PassRegistry&);
+
+// Specific to the rust-lang llvm branch:
+void initializeNullCheckEliminationPass(PassRegistry&);
 }
 
 #endif
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index b2309ffc2140..45a3256397e6 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -160,6 +160,9 @@ namespace {
       (void) llvm::createScalarizerPass();
       (void) llvm::createSeparateConstOffsetFromGEPPass();
 
+      // Specific to the rust-lang llvm branch:
+      (void) llvm::createNullCheckEliminationPass();
+
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
       (void)new llvm::ScalarEvolution();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 8ecfd801d0d8..4fe70c1d9ce1 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -388,6 +388,13 @@ FunctionPass *createSeparateConstOffsetFromGEPPass();
 //
 BasicBlockPass *createLoadCombinePass();
 
+// Specific to the rust-lang llvm branch:
+//===----------------------------------------------------------------------===//
+//
+// NullCheckElimination - Eliminate null checks.
+//
+FunctionPass *createNullCheckEliminationPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 2dcfa237ca33..08a789f848f7 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  NullCheckElimination.cpp
   PartiallyInlineLibCalls.cpp
   Reassociate.cpp
   Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/NullCheckElimination.cpp b/lib/Transforms/Scalar/NullCheckElimination.cpp
new file mode 100644
index 000000000000..1a921ccaaa86
--- /dev/null
+++ b/lib/Transforms/Scalar/NullCheckElimination.cpp
@@ -0,0 +1,273 @@
+//===-- NullCheckElimination.cpp - Null Check Elimination Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "null-check-elimination"
+
+namespace {
+  struct NullCheckElimination : public FunctionPass {
+    static char ID;
+    NullCheckElimination() : FunctionPass(ID) {
+      initializeNullCheckEliminationPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+    }
+
+  private:
+    static const unsigned kPhiLimit = 16;
+    typedef SmallPtrSet<PHINode*, kPhiLimit> SmallPhiSet;
+    enum NullCheckResult {
+	NotNullCheck,
+	NullCheckEq,
+	NullCheckNe,
+    };
+
+    bool isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, PHINode*);
+
+    NullCheckResult isCmpNullCheck(ICmpInst*);
+    std::pair<Use*, NullCheckResult> findNullCheck(Use*);
+
+    bool blockContainsLoadDerivedFrom(BasicBlock*, Value*);
+
+    DenseSet<Value*> NonNullOrPoisonValues;
+  };
+}
+
+char NullCheckElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(NullCheckElimination,
+		      "null-check-elimination",
+		      "Null Check Elimination",
+		      false, false)
+INITIALIZE_PASS_END(NullCheckElimination,
+		    "null-check-elimination",
+		    "Null Check Elimination",
+		    false, false)
+
+FunctionPass *llvm::createNullCheckEliminationPass() {
+  return new NullCheckElimination();
+}
+
+bool NullCheckElimination::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  bool Changed = false;
+
+  // Collect argumetns with the `nonnull` attribute.
+  for (auto &Arg : F.args()) {
+    if (Arg.hasNonNullAttr())
+      NonNullOrPoisonValues.insert(&Arg);
+  }
+
+  // Collect instructions that definitely produce nonnull-or-poison values.
+  // At the moment, this is restricted to inbounds GEPs. It would be slightly
+  // more difficult to include uses of values dominated by a null check, since
+  // then we would have to consider uses instead of mere values.
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+	if (GEP->isInBounds()) {
+	  NonNullOrPoisonValues.insert(GEP);
+	}
+      }
+    }
+  }
+
+  // Find phis that are derived entirely from nonnull-or-poison values,
+  // including other phis that are themselves derived entirely from these
+  // values.
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      auto *PN = dyn_cast<PHINode>(&I);
+      if (!PN)
+	break;
+
+      SmallPhiSet VisitedPHIs;
+      if (isNonNullOrPoisonPhi(&VisitedPHIs, PN))
+	NonNullOrPoisonValues.insert(PN);
+    }
+  }
+
+  for (auto &BB : F) {
+    // This could also be extended to handle SwitchInst, but using a SwitchInst
+    // for a null check seems unlikely.
+    auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
+    if (!BI || BI->isUnconditional())
+      continue;
+
+    // The first operand of a conditional branch is the condition.
+    auto result = findNullCheck(&BI->getOperandUse(0));
+    if (!result.first)
+      continue;
+    assert((result.second == NullCheckEq || result.second == NullCheckNe) &&
+	   "valid null check kind expected if ICmpInst was found");
+
+    BasicBlock *NonNullBB;
+    if (result.second == NullCheckEq) {
+      // If the comparison instruction is checking for equaliity with null,
+      // then the pointer is nonnull on the `false` branch.
+      NonNullBB = BI->getSuccessor(1);
+    } else {
+      // Otherwise, if the comparison instruction is checking for inequality
+      // with null, the pointer is nonnull on the `true` branch.
+      NonNullBB = BI->getSuccessor(0);
+    }
+
+    Use *U = result.first;
+    ICmpInst *CI = cast<ICmpInst>(U->get());
+    unsigned nonConstantIndex;
+    if (isa<Constant>(CI->getOperand(0)))
+      nonConstantIndex = 1;
+    else
+      nonConstantIndex = 0;
+
+    // Due to the semantics of poison values in LLVM, we have to check that
+    // there is actually some externally visible side effect that is dependent
+    // on the poison value. Since poison values are otherwise treated as undef,
+    // and a load of undef is undefined behavior (which is externally visible),
+    // it suffices to look for a load of the nonnull-or-poison value.
+    //
+    // This could be extended to any block control-dependent on this branch of
+    // the null check, it's unclear if that will actually catch more cases in
+    // real code.
+    Value *PtrV = CI->getOperand(nonConstantIndex);
+    if (blockContainsLoadDerivedFrom(NonNullBB, PtrV)) {
+      Type *BoolTy = CI->getType();
+      Value *NewV = ConstantInt::get(BoolTy, result.second == NullCheckNe);
+      U->set(NewV);
+    }
+  }
+
+  NonNullOrPoisonValues.clear();
+
+  return Changed;
+}
+
+/// Checks whether a phi is derived from known nonnnull-or-poison values,
+/// including other phis that are derived from the same. May return `false`
+/// conservatively in some cases, e.g. if exploring a large cycle of phis.
+bool
+NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis,
+					   PHINode *PN) {
+  // If we've already seen this phi, return `true`, even though it may not be
+  // nonnull, since some other operand in a cycle of phis may invalidate the
+  // optimistic assumption that the entire cycle is nonnull, including this phi.
+  if (!VisitedPhis->insert(PN))
+    return true;
+
+  // Use a sensible limit to avoid iterating over long chains of phis that are
+  // unlikely to be nonnull.
+  if (VisitedPhis->size() >= kPhiLimit)
+    return false;
+
+  unsigned numOperands = PN->getNumOperands();
+  for (unsigned i = 0; i < numOperands; ++i) {
+    Value *SrcValue = PN->getOperand(i);
+    if (NonNullOrPoisonValues.count(SrcValue)) {
+      continue;
+    } else if (auto *SrcPN = dyn_cast<PHINode>(SrcValue)) {
+      if (!isNonNullOrPoisonPhi(VisitedPhis, SrcPN))
+	return false;
+    } else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/// Determines whether an ICmpInst is a null check of a known nonnull-or-poison
+/// value.
+NullCheckElimination::NullCheckResult
+NullCheckElimination::isCmpNullCheck(ICmpInst *CI) {
+  if (!CI->isEquality())
+    return NotNullCheck;
+
+  unsigned constantIndex;
+  if (NonNullOrPoisonValues.count(CI->getOperand(0)))
+    constantIndex = 1;
+  else if (NonNullOrPoisonValues.count(CI->getOperand(1)))
+    constantIndex = 0;
+  else
+    return NotNullCheck;
+
+  auto *C = dyn_cast<Constant>(CI->getOperand(constantIndex));
+  if (!C || !C->isZeroValue())
+    return NotNullCheck;
+
+  return
+    CI->getPredicate() == llvm::CmpInst::ICMP_EQ ? NullCheckEq : NullCheckNe;
+}
+
+/// Finds the Use, if any, of an ICmpInst null check of a nonnull-or-poison
+/// value.
+std::pair<Use*, NullCheckElimination::NullCheckResult>
+NullCheckElimination::findNullCheck(Use *U) {
+  auto *I = dyn_cast<Instruction>(U->get());
+  if (!I)
+    return std::make_pair(nullptr, NotNullCheck);
+
+  if (auto *CI = dyn_cast<ICmpInst>(I)) {
+    NullCheckResult result = isCmpNullCheck(CI);
+    if (result == NotNullCheck)
+      return std::make_pair(nullptr, NotNullCheck);
+    else
+      return std::make_pair(U, result);
+  }
+
+  unsigned opcode = I->getOpcode();
+  if (opcode == Instruction::Or || opcode == Instruction::And) {
+    auto result = findNullCheck(&I->getOperandUse(0));
+    if (result.second == NotNullCheck)
+      return findNullCheck(&I->getOperandUse(1));
+    else
+      return result;
+  }
+
+  return std::make_pair(nullptr, NotNullCheck);
+}
+
+/// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP
+/// derived from `PtrV`.
+bool
+NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock *BB,
+						   Value *PtrV) {
+  for (auto &I : *BB) {
+    auto *LI = dyn_cast<LoadInst>(&I);
+    if (!LI)
+      continue;
+
+    Value *V = LI->getPointerOperand();
+    while (NonNullOrPoisonValues.count(V)) {
+      if (V == PtrV)
+	return true;
+
+      auto *GEP = dyn_cast<GetElementPtrInst>(V);
+      if (!GEP)
+	break;
+
+      V = GEP->getOperand(0);
+    }
+  }
+
+  return false;
+}
+
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index edf012d81171..f2aed1ea0f62 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -66,6 +66,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
   initializeLoadCombinePass(Registry);
+  initializeNullCheckEliminationPass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
diff --git a/test/Transforms/NullCheckElimination/basic.ll b/test/Transforms/NullCheckElimination/basic.ll
new file mode 100644
index 000000000000..f176fb32c3fd
--- /dev/null
+++ b/test/Transforms/NullCheckElimination/basic.ll
@@ -0,0 +1,165 @@
+; RUN: opt < %s -null-check-elimination -instsimplify -S | FileCheck %s
+
+define i64 @test_arg_simple(i64* nonnull %p) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_arg_simple
+; CHECK-NOT: , null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_arg_simple_fail(i64* %p) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_arg_simple_fail
+; CHECK: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_simple(i64* %p) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_simple
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_simple_fail(i64* %p) {
+entry:
+  %p0 = getelementptr i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_simple_fail
+; CHECK: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_or(i64* %p, i64* %q) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, %q
+  %b1 = icmp eq i64* %p1, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_or
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_and(i64* %p, i64* %q) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, %q
+  %b1 = icmp eq i64* %p1, null
+  %b2 = and i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_and
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_derived_load(i64* %p) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_derived_load
+; CHECK-NOT: null
+
+match_else:
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %i0 = load i64* %p2
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+

From 0876764d07653e8f51d9b9ba943aab0920caf99d Mon Sep 17 00:00:00 2001
From: Cameron Zwarich <zwarich@mozilla.com>
Date: Thu, 26 Jun 2014 23:41:06 -0700
Subject: [PATCH 2/4] Add the NullCheckElimination pass to the default pass
 list

Since the NullCheckElimination pass has a similar intent to the
CorrelatedValuePropagation pass, I decided to run it right after the
both places that the latter runs.
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index c20c717de5e7..9503edc72b9b 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -187,6 +187,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
   MPM.add(createJumpThreadingPass());         // Thread jumps.
   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+  // Specific to the rust-lang llvm branch:
+  MPM.add(createNullCheckEliminationPass());  // Eliminate null checks
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createInstructionCombiningPass());  // Combine silly seq's
   addExtensionsToPM(EP_Peephole, MPM);
@@ -218,6 +220,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
+  // Specific to the rust-lang llvm branch:
+  MPM.add(createNullCheckEliminationPass());  // Eliminate null checks
   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);

From ab85d02e84edeea59ac38505a62ec7d0536cc726 Mon Sep 17 00:00:00 2001
From: Cameron Zwarich <zwarich@mozilla.com>
Date: Sun, 29 Jun 2014 11:18:05 -0700
Subject: [PATCH 3/4] Improve the NullCheckElimination pass

Teach the NullCheckElimination pass about recurrences involving inbounds
GEPs. This allows the pass to optimize null checks from most uses of
single slice and vector iterators.

This still isn't sufficient to eliminate null checks from uses of
multiple iterators with zip().
---
 .../Scalar/NullCheckElimination.cpp           | 373 ++++++++++++------
 test/Transforms/NullCheckElimination/basic.ll | 120 ++++++
 .../NullCheckElimination/complex.ll           | 316 +++++++++++++++
 3 files changed, 688 insertions(+), 121 deletions(-)
 create mode 100644 test/Transforms/NullCheckElimination/complex.ll

diff --git a/lib/Transforms/Scalar/NullCheckElimination.cpp b/lib/Transforms/Scalar/NullCheckElimination.cpp
index 1a921ccaaa86..7295db7adc47 100644
--- a/lib/Transforms/Scalar/NullCheckElimination.cpp
+++ b/lib/Transforms/Scalar/NullCheckElimination.cpp
@@ -34,78 +34,119 @@ namespace {
   private:
     static const unsigned kPhiLimit = 16;
     typedef SmallPtrSet<PHINode*, kPhiLimit> SmallPhiSet;
-    enum NullCheckResult {
-	NotNullCheck,
-	NullCheckEq,
-	NullCheckNe,
+
+    enum CmpKind {
+      /// A null check of an unconditionally nonnull-or-poison value.
+      NullCheckDefiniteCmp,
+
+      /// A null check of the phi representing a nontrivial inbounds recurrence,
+      /// which is not known to be unconditionally nonnull-or-poison.
+      NullCheckRecurrenceCmp,
+
+      /// A comparison of the phi representing a nontrivial inbounds recurrence
+      /// with an inbounds GEP derived from the base of the recurrence, which
+      /// will typically represent a bound on the recurrence.
+      RecurrencePhiBoundCmp,
+    };
+
+    enum CmpPred {
+      CmpEq,
+      CmpNe,
+    };
+
+    struct CmpDesc {
+      CmpDesc(CmpKind k, CmpPred p, Use *u, Value *v)
+        : kind(k), pred(p), use(u), ptrValue(v) { }
+
+      CmpKind kind;
+      CmpPred pred;
+      Use *use;
+      Value *ptrValue;
     };
 
+    typedef SmallVector<CmpDesc, 4> CmpDescVec;
+
     bool isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, PHINode*);
+    Value *isNontrivialInBoundsRecurrence(PHINode*);
 
-    NullCheckResult isCmpNullCheck(ICmpInst*);
-    std::pair<Use*, NullCheckResult> findNullCheck(Use*);
+    bool classifyCmp(CmpDescVec*, Use*);
+    bool findRelevantCmps(CmpDescVec*, Use*);
 
     bool blockContainsLoadDerivedFrom(BasicBlock*, Value*);
 
+    /// Tracks values that are unconditionally nonnull-or-poison by definition,
+    /// but not values that are known nonnull-or-poison in a given context by
+    /// their uses, e.g. in recurrences.
     DenseSet<Value*> NonNullOrPoisonValues;
+
+    /// Tracks values that are bases of nontrivial inbounds recurrences.
+    DenseSet<Value*> InBoundsRecurrenceBases;
+
+    /// Maps phis that correspond to nontrivial inbounds recurrences to their
+    /// base values.
+    DenseMap<Value*, Value*> InBoundsRecurrenceBaseMap;
   };
 }
 
 char NullCheckElimination::ID = 0;
 INITIALIZE_PASS_BEGIN(NullCheckElimination,
-		      "null-check-elimination",
-		      "Null Check Elimination",
-		      false, false)
+                      "null-check-elimination",
+                      "Null Check Elimination",
+                      false, false)
 INITIALIZE_PASS_END(NullCheckElimination,
-		    "null-check-elimination",
-		    "Null Check Elimination",
-		    false, false)
+                    "null-check-elimination",
+                    "Null Check Elimination",
+                    false, false)
 
 FunctionPass *llvm::createNullCheckEliminationPass() {
   return new NullCheckElimination();
 }
 
+static GetElementPtrInst *castToInBoundsGEP(Value *V) {
+  auto *GEP = dyn_cast<GetElementPtrInst>(V);
+  if (!GEP || !GEP->isInBounds())
+    return nullptr;
+  return GEP;
+}
+
+static bool isZeroConstant(Value *V) {
+  auto *C = dyn_cast<Constant>(V);
+  return C && C->isZeroValue();
+}
+
 bool NullCheckElimination::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
   bool Changed = false;
 
-  // Collect argumetns with the `nonnull` attribute.
+  // Collect arguments with the `nonnull` attribute.
   for (auto &Arg : F.args()) {
     if (Arg.hasNonNullAttr())
       NonNullOrPoisonValues.insert(&Arg);
   }
 
-  // Collect instructions that definitely produce nonnull-or-poison values.
-  // At the moment, this is restricted to inbounds GEPs. It would be slightly
-  // more difficult to include uses of values dominated by a null check, since
-  // then we would have to consider uses instead of mere values.
+  // Collect instructions that definitely produce nonnull-or-poison values. At
+  // the moment, this is restricted to inbounds GEPs, and phis that are derived
+  // entirely from nonnull-or-poison-values (including other phis that are
+  // themselves derived from the same).
   for (auto &BB : F) {
     for (auto &I : BB) {
-      if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
-	if (GEP->isInBounds()) {
-	  NonNullOrPoisonValues.insert(GEP);
-	}
+      if (auto *GEP = castToInBoundsGEP(&I)) {
+        NonNullOrPoisonValues.insert(GEP);
+      } else if (auto *PN = dyn_cast<PHINode>(&I)) {
+        SmallPhiSet VisitedPHIs;
+        if (isNonNullOrPoisonPhi(&VisitedPHIs, PN))
+          NonNullOrPoisonValues.insert(PN);
+
+        if (auto *BaseV = isNontrivialInBoundsRecurrence(PN)) {
+          InBoundsRecurrenceBases.insert(BaseV);
+          InBoundsRecurrenceBaseMap[PN] = BaseV;
+        }
       }
     }
   }
 
-  // Find phis that are derived entirely from nonnull-or-poison values,
-  // including other phis that are themselves derived entirely from these
-  // values.
-  for (auto &BB : F) {
-    for (auto &I : BB) {
-      auto *PN = dyn_cast<PHINode>(&I);
-      if (!PN)
-	break;
-
-      SmallPhiSet VisitedPHIs;
-      if (isNonNullOrPoisonPhi(&VisitedPHIs, PN))
-	NonNullOrPoisonValues.insert(PN);
-    }
-  }
-
   for (auto &BB : F) {
     // This could also be extended to handle SwitchInst, but using a SwitchInst
     // for a null check seems unlikely.
@@ -114,49 +155,75 @@ bool NullCheckElimination::runOnFunction(Function &F) {
       continue;
 
     // The first operand of a conditional branch is the condition.
-    auto result = findNullCheck(&BI->getOperandUse(0));
-    if (!result.first)
+    CmpDescVec Cmps;
+    if (!findRelevantCmps(&Cmps, &BI->getOperandUse(0)))
       continue;
-    assert((result.second == NullCheckEq || result.second == NullCheckNe) &&
-	   "valid null check kind expected if ICmpInst was found");
-
-    BasicBlock *NonNullBB;
-    if (result.second == NullCheckEq) {
-      // If the comparison instruction is checking for equaliity with null,
-      // then the pointer is nonnull on the `false` branch.
-      NonNullBB = BI->getSuccessor(1);
-    } else {
-      // Otherwise, if the comparison instruction is checking for inequality
-      // with null, the pointer is nonnull on the `true` branch.
-      NonNullBB = BI->getSuccessor(0);
-    }
 
-    Use *U = result.first;
-    ICmpInst *CI = cast<ICmpInst>(U->get());
-    unsigned nonConstantIndex;
-    if (isa<Constant>(CI->getOperand(0)))
-      nonConstantIndex = 1;
-    else
-      nonConstantIndex = 0;
-
-    // Due to the semantics of poison values in LLVM, we have to check that
-    // there is actually some externally visible side effect that is dependent
-    // on the poison value. Since poison values are otherwise treated as undef,
-    // and a load of undef is undefined behavior (which is externally visible),
-    // it suffices to look for a load of the nonnull-or-poison value.
-    //
-    // This could be extended to any block control-dependent on this branch of
-    // the null check, it's unclear if that will actually catch more cases in
-    // real code.
-    Value *PtrV = CI->getOperand(nonConstantIndex);
-    if (blockContainsLoadDerivedFrom(NonNullBB, PtrV)) {
-      Type *BoolTy = CI->getType();
-      Value *NewV = ConstantInt::get(BoolTy, result.second == NullCheckNe);
-      U->set(NewV);
+    for (auto &Cmp : Cmps) {
+      // We are only tracking comparisons of inbounds recurrence phis with their
+      // bounds so that we can eliminate null checks based on them, which are of
+      // kind NullCheckRecurrenceCmp. We can't use a lone RecurrencePhiBoundCmp
+      // to perform any optimizations.
+      if (Cmp.kind == RecurrencePhiBoundCmp)
+        continue;
+
+      if (Cmp.kind == NullCheckRecurrenceCmp) {
+        // Look for a matching RecurrencePhiBoundCmp. If one exists, then we can
+        // be sure that this branch condition depends on the recurrence. Since
+        // both the bounds and the recurrence successor value are inbounds, and
+        // they are both derived from the base, the base being null would imply
+        // that the bounds and recurrence successor values are poison.
+        bool FoundMatchingCmp = false;
+        for (auto &OtherCmp : Cmps) {
+          if (OtherCmp.kind == RecurrencePhiBoundCmp &&
+              OtherCmp.ptrValue == Cmp.ptrValue) {
+            FoundMatchingCmp = true;
+            break;
+          }
+        }
+        if (!FoundMatchingCmp)
+          continue;
+      }
+
+      BasicBlock *NonNullBB;
+      if (Cmp.pred == CmpEq) {
+        // If the comparison instruction is checking for equality with null then
+        // the pointer is nonnull on the `false` branch.
+        NonNullBB = BI->getSuccessor(1);
+      } else {
+        // Otherwise, if the comparison instruction is checking for inequality
+        // with null, the pointer is nonnull on the `true` branch.
+        NonNullBB = BI->getSuccessor(0);
+      }
+
+      // This is a crude approximation of control dependence: if the branch
+      // target has a single predecessor edge, then it must be control-
+      // dependent on the branch.
+      if (!NonNullBB->getSinglePredecessor())
+        continue;
+
+      // Due to the semantics of poison values in LLVM, we have to check that
+      // there is actually some externally visible side effect that is dependent
+      // on the poison value. Since poison values are otherwise treated as
+      // undef, and a load of undef is undefined behavior (which is externally
+      // visible), it suffices to look for a load of the nonnull-or-poison
+      // value.
+      //
+      // This could be extended to any block control-dependent on this branch of
+      // the null check, it's unclear if that will actually catch more cases in
+      // real code.
+      if (blockContainsLoadDerivedFrom(NonNullBB, Cmp.ptrValue)) {
+        Type *BoolTy = Type::getInt1Ty(F.getContext());
+        Value *NewV = ConstantInt::get(BoolTy, Cmp.pred == CmpNe);
+        Cmp.use->set(NewV);
+        Changed = true;
+      }
     }
   }
 
   NonNullOrPoisonValues.clear();
+  InBoundsRecurrenceBases.clear();
+  InBoundsRecurrenceBaseMap.clear();
 
   return Changed;
 }
@@ -164,9 +231,12 @@ bool NullCheckElimination::runOnFunction(Function &F) {
 /// Checks whether a phi is derived from known nonnnull-or-poison values,
 /// including other phis that are derived from the same. May return `false`
 /// conservatively in some cases, e.g. if exploring a large cycle of phis.
+///
+/// This function may also insert any inbounds GEPs that it finds into
+/// NonNullOrPoisonValues.
 bool
 NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis,
-					   PHINode *PN) {
+                                           PHINode *PN) {
   // If we've already seen this phi, return `true`, even though it may not be
   // nonnull, since some other operand in a cycle of phis may invalidate the
   // optimistic assumption that the entire cycle is nonnull, including this phi.
@@ -183,9 +253,11 @@ NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis,
     Value *SrcValue = PN->getOperand(i);
     if (NonNullOrPoisonValues.count(SrcValue)) {
       continue;
+    } else if (auto *GEP = castToInBoundsGEP(SrcValue)) {
+      NonNullOrPoisonValues.insert(GEP);
     } else if (auto *SrcPN = dyn_cast<PHINode>(SrcValue)) {
       if (!isNonNullOrPoisonPhi(VisitedPhis, SrcPN))
-	return false;
+        return false;
     } else {
       return false;
     }
@@ -194,75 +266,134 @@ NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis,
   return true;
 }
 
-/// Determines whether an ICmpInst is a null check of a known nonnull-or-poison
-/// value.
-NullCheckElimination::NullCheckResult
-NullCheckElimination::isCmpNullCheck(ICmpInst *CI) {
+/// Determines whether a phi corresponds to an inbounds recurrence where the
+/// base is not a known nonnull-or-poison value. Returns the base value, or
+/// null if the phi doesn't correspond to such a recurrence.
+Value *NullCheckElimination::isNontrivialInBoundsRecurrence(PHINode *PN) {
+  if (PN->getNumOperands() != 2)
+    return nullptr;
+
+  Value *BaseV;
+  GetElementPtrInst *SuccessorI;
+  if (auto *GEP = castToInBoundsGEP(PN->getOperand(0))) {
+    BaseV = PN->getOperand(1);
+    SuccessorI = GEP;
+  } else if (auto *GEP = castToInBoundsGEP(PN->getOperand(1))) {
+    BaseV = PN->getOperand(0);
+    SuccessorI = GEP;
+  } else {
+    return nullptr;
+  }
+
+  if (NonNullOrPoisonValues.count(BaseV) || SuccessorI->getOperand(0) != PN)
+    return nullptr;
+
+  return BaseV;
+}
+
+/// Determines whether an ICmpInst is one of the forms that is relevant to
+/// null check elimination, and then adds a CmpDesc to Cmps when applicable.
+/// The ICmpInst is passed as a Use so this Use can be placed into the CmpDesc,
+/// but the Use parameter must be a Use of an ICmpInst.
+bool NullCheckElimination::classifyCmp(CmpDescVec *Cmps, Use *U) {
+  auto *CI = cast<ICmpInst>(U);
   if (!CI->isEquality())
-    return NotNullCheck;
-
-  unsigned constantIndex;
-  if (NonNullOrPoisonValues.count(CI->getOperand(0)))
-    constantIndex = 1;
-  else if (NonNullOrPoisonValues.count(CI->getOperand(1)))
-    constantIndex = 0;
-  else
-    return NotNullCheck;
-
-  auto *C = dyn_cast<Constant>(CI->getOperand(constantIndex));
-  if (!C || !C->isZeroValue())
-    return NotNullCheck;
-
-  return
-    CI->getPredicate() == llvm::CmpInst::ICMP_EQ ? NullCheckEq : NullCheckNe;
+    return false;
+
+  CmpPred Pred = (CI->getPredicate() == llvm::CmpInst::ICMP_EQ) ? CmpEq : CmpNe;
+  Value *Op0 = CI->getOperand(0);
+  Value *Op1 = CI->getOperand(1);
+
+  if (NonNullOrPoisonValues.count(Op0)) {
+    if (isZeroConstant(Op1)) {
+      Cmps->push_back(CmpDesc(NullCheckDefiniteCmp, Pred, U, Op0));
+      return true;
+    }
+
+    auto it = InBoundsRecurrenceBaseMap.find(Op1);
+    if (it == InBoundsRecurrenceBaseMap.end())
+      return false;
+
+    auto *GEP = castToInBoundsGEP(Op0);
+    if (!GEP)
+      return false;
+
+    auto *BaseV = it->second;
+    if (GEP->getOperand(0) != BaseV)
+      return false;
+
+    Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op1));
+    return true;
+  }
+
+  // Since InstCombine or InstSimplify should have canonicalized a comparison
+  // with `null` to have the `null` in the second operand, we don't need to
+  // handle the case where Op0 is `null` like we did with Op1 above.
+  if (NonNullOrPoisonValues.count(Op1)) {
+    auto it = InBoundsRecurrenceBaseMap.find(Op0);
+    if (it == InBoundsRecurrenceBaseMap.end())
+      return false;
+
+    auto *GEP = castToInBoundsGEP(Op1);
+    if (!GEP)
+      return false;
+
+    auto *BaseV = it->second;
+    if (GEP->getOperand(0) != BaseV)
+      return false;
+
+    Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op0));
+    return true;
+  }
+
+  if (InBoundsRecurrenceBaseMap.count(Op0)) {
+    if (isZeroConstant(Op1)) {
+      Cmps->push_back(CmpDesc(NullCheckRecurrenceCmp, Pred, U, Op0));
+      return true;
+    }
+  }
+
+  return false;
 }
 
-/// Finds the Use, if any, of an ICmpInst null check of a nonnull-or-poison
-/// value.
-std::pair<Use*, NullCheckElimination::NullCheckResult>
-NullCheckElimination::findNullCheck(Use *U) {
+/// Classifies the comparisons that are relevant to null check elimination,
+/// starting from a Use. The CmpDescs of the comparisons are collected in Cmps.
+bool NullCheckElimination::findRelevantCmps(CmpDescVec *Cmps, Use *U) {
   auto *I = dyn_cast<Instruction>(U->get());
   if (!I)
-    return std::make_pair(nullptr, NotNullCheck);
-
-  if (auto *CI = dyn_cast<ICmpInst>(I)) {
-    NullCheckResult result = isCmpNullCheck(CI);
-    if (result == NotNullCheck)
-      return std::make_pair(nullptr, NotNullCheck);
-    else
-      return std::make_pair(U, result);
-  }
+    return false;
 
-  unsigned opcode = I->getOpcode();
-  if (opcode == Instruction::Or || opcode == Instruction::And) {
-    auto result = findNullCheck(&I->getOperandUse(0));
-    if (result.second == NotNullCheck)
-      return findNullCheck(&I->getOperandUse(1));
-    else
-      return result;
+  if (isa<ICmpInst>(I))
+    return classifyCmp(Cmps, U);
+
+  unsigned Opcode = I->getOpcode();
+  if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+    bool FoundCmps = findRelevantCmps(Cmps, &I->getOperandUse(0));
+    FoundCmps |= findRelevantCmps(Cmps, &I->getOperandUse(1));
+    return FoundCmps;
   }
 
-  return std::make_pair(nullptr, NotNullCheck);
+  return false;
 }
 
 /// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP
 /// derived from `PtrV`.
 bool
 NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock *BB,
-						   Value *PtrV) {
+                                                   Value *PtrV) {
   for (auto &I : *BB) {
     auto *LI = dyn_cast<LoadInst>(&I);
     if (!LI)
       continue;
 
     Value *V = LI->getPointerOperand();
-    while (NonNullOrPoisonValues.count(V)) {
+    while (1) {
       if (V == PtrV)
-	return true;
+        return true;
 
-      auto *GEP = dyn_cast<GetElementPtrInst>(V);
+      auto *GEP = castToInBoundsGEP(V);
       if (!GEP)
-	break;
+        break;
 
       V = GEP->getOperand(0);
     }
diff --git a/test/Transforms/NullCheckElimination/basic.ll b/test/Transforms/NullCheckElimination/basic.ll
index f176fb32c3fd..8ea5851f627a 100644
--- a/test/Transforms/NullCheckElimination/basic.ll
+++ b/test/Transforms/NullCheckElimination/basic.ll
@@ -22,6 +22,28 @@ return:
   ret i64 0
 }
 
+define i64 @test_arg_simple_ne(i64* nonnull %p) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp ne i64* %p0, null
+  br i1 %b0, label %match_else, label %return
+
+; CHECK-LABEL: @test_arg_simple
+; CHECK-NOT: , null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
 define i64 @test_arg_simple_fail(i64* %p) {
 entry:
   br label %loop_body
@@ -44,6 +66,31 @@ return:
   ret i64 0
 }
 
+define i64 @test_arg_simple_fail_control_dep(i64* nonnull %p) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  br i1 undef, label %loop_body2, label %match_else
+
+loop_body2:
+  %b0 = icmp eq i64* %p0, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_arg_simple_fail_control_dep
+; CHECK: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
 define i64 @test_inbounds_simple(i64* %p) {
 entry:
   %p0 = getelementptr inbounds i64* %p, i64 0
@@ -67,6 +114,29 @@ return:
   ret i64 0
 }
 
+define i64 @test_inbounds_simple_ne(i64* %p) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp ne i64* %p1, null
+  br i1 %b0, label %match_else, label %return
+
+; CHECK-LABEL: @test_inbounds_simple
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
 define i64 @test_inbounds_simple_fail(i64* %p) {
 entry:
   %p0 = getelementptr i64* %p, i64 0
@@ -90,6 +160,32 @@ return:
   ret i64 0
 }
 
+define i64 @test_inbounds_simple_fail_control_dep(i64* %p) {
+entry:
+  %p0 = getelementptr i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  br i1 undef, label %loop_body2, label %match_else
+
+loop_body2:
+  %b0 = icmp eq i64* %p1, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_simple_fail_control_dep
+; CHECK: null
+
+match_else:
+  %i0 = load i64* %p1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
 define i64 @test_inbounds_or(i64* %p, i64* %q) {
 entry:
   %p0 = getelementptr inbounds i64* %p, i64 0
@@ -163,3 +259,27 @@ return:
   ret i64 0
 }
 
+define i64 @test_inbounds_derived_load_fail(i64* %p) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  br i1 %b0, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_derived_load_fail
+; CHECK: icmp eq i64* %p1, null
+
+match_else:
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %p3 = getelementptr i64* %p1, i64 1
+  %i0 = load i64* %p3
+  %b1 = icmp ugt i64 %i0, 10
+  br i1 %b1, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
diff --git a/test/Transforms/NullCheckElimination/complex.ll b/test/Transforms/NullCheckElimination/complex.ll
new file mode 100644
index 000000000000..3059d02ad8dc
--- /dev/null
+++ b/test/Transforms/NullCheckElimination/complex.ll
@@ -0,0 +1,316 @@
+; RUN: opt < %s -null-check-elimination -instsimplify -S | FileCheck %s
+
+define i64 @test_arg_multiple(i64* nonnull %p, i64* nonnull %q) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %q0 = phi i64* [ %q, %entry ], [ %q1, %match_else ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_arg_multiple
+; CHECK-NOT: , null
+
+match_else:
+  %i0 = load i64* %p0
+  %i1 = load i64* %q0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %q1 = getelementptr inbounds i64* %q0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_arg_multiple_fail(i64* nonnull %p, i64* %q) {
+entry:
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %q0 = phi i64* [ %q, %entry ], [ %q1, %match_else ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_arg_multiple_fail
+; CHECK-NOT: icmp eq i64* %p0, null
+; CHECK: icmp eq i64* %q0, null
+
+match_else:
+  %i0 = load i64* %p0
+  %i1 = load i64* %q0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %q1 = getelementptr inbounds i64* %q0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_multiple(i64* %p, i64* %q) {
+entry:
+  %p0 = getelementptr inbounds i64* %p, i64 0
+  %q0 = getelementptr inbounds i64* %q, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %q1 = phi i64* [ %q0, %entry ], [ %q2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  %b1 = icmp eq i64* %q1, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_multiple
+; CHECK-NOT: , null
+
+match_else:
+  %i0 = load i64* %p1
+  %i1 = load i64* %q1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %q2 = getelementptr inbounds i64* %q1, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_multiple_fail(i64* %p, i64* %q) {
+entry:
+  %p0 = getelementptr i64* %p, i64 0
+  %q0 = getelementptr inbounds i64* %q, i64 0
+  br label %loop_body
+
+loop_body:
+  %p1 = phi i64* [ %p0, %entry ], [ %p2, %match_else ]
+  %q1 = phi i64* [ %q0, %entry ], [ %q2, %match_else ]
+  %b0 = icmp eq i64* %p1, null
+  %b1 = icmp eq i64* %q1, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_multiple_fail
+; CHECK: icmp eq i64* %p1, null
+; CHECK-NOT: icmp eq i64* %q1, null
+
+match_else:
+  %i0 = load i64* %p1
+  %i1 = load i64* %q1
+  %p2 = getelementptr inbounds i64* %p1, i64 1
+  %q2 = getelementptr inbounds i64* %q1, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, %q
+  %b1 = icmp eq i64* %p0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or_fail1(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, %q
+  %b1 = icmp eq i64* %p0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or_fail1
+; CHECK: icmp eq i64* %p0, null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or_fail2(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, %q
+  %b1 = icmp eq i64* %p0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or_fail2
+; CHECK: icmp eq i64* %p0, null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_and(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p, %entry ], [ %p1, %match_else ]
+  %b0 = icmp eq i64* %p0, %q
+  %b1 = icmp eq i64* %p0, null
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_and
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or_rev(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q, %p0
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or_rev
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or_rev_fail1(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q, %p0
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or_rev_fail1
+; CHECK: icmp eq i64* %p0, null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_or_rev_fail2(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q, %p0
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_or_rev_fail2
+; CHECK: icmp eq i64* %p0, null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+
+define i64 @test_inbounds_recurrence_and_rev(i64* %p, i64 %len) {
+entry:
+  %q = getelementptr inbounds i64* %p, i64 %len
+  br label %loop_body
+
+loop_body:
+  %p0 = phi i64* [ %p1, %match_else ], [ %p, %entry ]
+  %b0 = icmp eq i64* %p0, null
+  %b1 = icmp eq i64* %q, %p0
+  %b2 = or i1 %b0, %b1
+  br i1 %b2, label %return, label %match_else
+
+; CHECK-LABEL: @test_inbounds_recurrence_and_rev
+; CHECK-NOT: null
+
+match_else:
+  %i0 = load i64* %p0
+  %p1 = getelementptr inbounds i64* %p0, i64 1
+  %b3 = icmp ugt i64 %i0, 10
+  br i1 %b3, label %return, label %loop_body
+
+return:
+  ret i64 0
+}
+

From a37ca05822ea4c99a344f904bed996a131261db8 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 14 Jul 2014 06:22:36 +0000
Subject: [PATCH 4/4] Support lowering of empty aggregates.

This crash was pretty common while compiling Rust for iOS (armv7). Reason -
SjLj preparation step was lowering aggregate arguments as ExtractValue +
InsertValue. ExtractValue has assertion which checks that there is some data in
value, which is not true in case of empty (no fields) structures. Rust uses
them quite extensively so this patch uses a 'select true, %val, undef'
instruction to lower the argument.

Patch by Valerii Hiora.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212922 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SjLjEHPrepare.cpp                 | 22 ++++++-------
 .../ARM/sjljehprepare-lower-empty-struct.ll   | 31 +++++++++++++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)
 create mode 100644 test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll

diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index d2f395594860..f53408a0f124 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -249,18 +249,18 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
        ++AI) {
     Type *Ty = AI->getType();
 
-    // Aggregate types can't be cast, but are legal argument types, so we have
-    // to handle them differently. We use an extract/insert pair as a
-    // lightweight method to achieve the same goal.
     if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
-      Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
-      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
-      NI->insertAfter(EI);
-      AI->replaceAllUsesWith(NI);
-
-      // Set the operand of the instructions back to the AllocaInst.
-      EI->setOperand(0, AI);
-      NI->setOperand(0, AI);
+      // Aggregate types can't be cast, but are legal argument types,
+      // so we have to handle them differently.  We use
+      // select i8 true, %arg, undef to achieve the same goal
+      Value *TrueValue = ConstantInt::getTrue(F.getContext());
+      Value *UndefValue = UndefValue::get(Ty);
+      Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
+                                           AI->getName() + ".tmp",
+                                           AfterAllocaInsPt);
+      AI->replaceAllUsesWith(SI);
+
+      SI->setOperand(1, AI);
     } else {
       // This is always a no-op cast because we're casting AI to AI->getType()
       // so src and destination types are identical. BitCast is the only
diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
new file mode 100644
index 000000000000..e26f6350050f
--- /dev/null
+++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=armv7-apple-ios -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios -O1 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios -O2 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios -O3 < %s | FileCheck %s
+
+; SjLjEHPrepare shouldn't crash when lowering empty structs.
+;
+; Checks that between in case of empty structs used as arguments
+; nothing happens, i.e. there are no instructions between
+; __Unwind_SjLj_Register and actual @bar invocation
+
+
+define i8* @foo({} %c) {
+entry:
+; CHECK: bl __Unwind_SjLj_Register
+; CHECK-NEXT: {{[A-Z][a-zA-Z0-9]*}}:
+; CHECK-NEXT: bl _bar
+  invoke void @bar ()
+    to label %unreachable unwind label %handler
+
+unreachable:
+  unreachable
+
+handler:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @baz to i8*)
+  cleanup
+  resume { i8*, i32 } undef
+}
+
+declare void @bar()
+declare i32 @baz(...)