diff --git a/Makefile.rules b/Makefile.rules
index 941797107c78..ff0a3e3f8191 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -377,6 +377,7 @@ ifeq ($(ENABLE_COVERAGE),1)
   BuildMode := $(BuildMode)+Coverage
   CXX.Flags += -ftest-coverage -fprofile-arcs
   C.Flags   += -ftest-coverage -fprofile-arcs
+  LD.Flags   += -ftest-coverage -fprofile-arcs
 endif
 
 # If DISABLE_ASSERTIONS=1 is specified (make command line or configured),
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index cb94d3967b13..c535443e2c57 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -2749,11 +2749,12 @@ number representing the maximum relative error, for example:
 '``range``' Metadata
 ^^^^^^^^^^^^^^^^^^^^
 
-``range`` metadata may be attached only to loads of integer types. It
-expresses the possible ranges the loaded value is in. The ranges are
-represented with a flattened list of integers. The loaded value is known
-to be in the union of the ranges defined by each consecutive pair. Each
-pair has the following properties:
+``range`` metadata may be attached only to ``load``, ``call`` and ``invoke`` of
+integer types. It expresses the possible ranges the loaded value or the value
+returned by the called function at this call site is in. The ranges are
+represented with a flattened list of integers. The loaded value or the value
+returned is known to be in the union of the ranges defined by each consecutive
+pair. Each pair has the following properties:
 
 -  The type must match the type loaded by the instruction.
 -  The pair ``a,b`` represents the range ``[a,b)``.
@@ -2771,8 +2772,9 @@ Examples:
 
       %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
       %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
-      %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
-      %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
+      %c = call i8 @foo(),       !range !2 ; Can only be 0, 1, 3, 4 or 5
+      %d = invoke i8 @bar() to label %cont
+             unwind label %lpad, !range !3 ; Can only be -2, -1, 3, 4 or 5
     ...
     !0 = metadata !{ i8 0, i8 2 }
     !1 = metadata !{ i8 255, i8 2 }
@@ -3144,6 +3146,42 @@ Each individual option is required to be either a valid option for the target's
 linker, or an option that is reserved by the target specific assembly writer or
 object file emitter. No other aspect of these options is defined by the IR.
 
+C type width Module Flags Metadata
+----------------------------------
+
+The ARM backend emits a section into each generated object file describing the
+options that it was compiled with (in a compiler-independent way) to prevent
+linking incompatible objects, and to allow automatic library selection. Some
+of these options are not visible at the IR level, namely wchar_t width and enum
+width.
+
+To pass this information to the backend, these options are encoded in module
+flags metadata, using the following key-value pairs:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Key
+     - Value
+
+   * - short_wchar
+     - * 0 --- sizeof(wchar_t) == 4
+       * 1 --- sizeof(wchar_t) == 2
+
+   * - short_enum
+     - * 0 --- Enums are at least as large as an ``int``.
+       * 1 --- Enums are stored in the smallest integer type which can
+         represent all of its values.
+
+For example, the following metadata section specifies that the module was
+compiled with a ``wchar_t`` width of 4 bytes, and the underlying type of an
+enum is the smallest type which can represent all of its values::
+
+    !llvm.module.flags = !{!0, !1}
+    !0 = metadata !{i32 1, metadata !"short_wchar", i32 1}
+    !1 = metadata !{i32 1, metadata !"short_enum", i32 0}
+
 .. _intrinsicglobalvariables:
 
 Intrinsic Global Variables
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 4973e5c66719..a7b28b36ca1d 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -2170,46 +2170,13 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and
 using the resultant compiler to build a copy of LLVM with multithreading
 support.
 
-.. _startmultithreaded:
-
-Entering and Exiting Multithreaded Mode
----------------------------------------
-
-In order to properly protect its internal data structures while avoiding
-excessive locking overhead in the single-threaded case, the LLVM must intialize
-certain data structures necessary to provide guards around its internals.  To do
-so, the client program must invoke ``llvm_start_multithreaded()`` before making
-any concurrent LLVM API calls.  To subsequently tear down these structures, use
-the ``llvm_stop_multithreaded()`` call.  You can also use the
-``llvm_is_multithreaded()`` call to check the status of multithreaded mode.
-
-Note that both of these calls must be made *in isolation*.  That is to say that
-no other LLVM API calls may be executing at any time during the execution of
-``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``.  It is the
-client's responsibility to enforce this isolation.
-
-The return value of ``llvm_start_multithreaded()`` indicates the success or
-failure of the initialization.  Failure typically indicates that your copy of
-LLVM was built without multithreading support, typically because GCC atomic
-intrinsics were not found in your system compiler.  In this case, the LLVM API
-will not be safe for concurrent calls.  However, it *will* be safe for hosting
-threaded applications in the JIT, though :ref:`care must be taken
-<jitthreading>` to ensure that side exits and the like do not accidentally
-result in concurrent LLVM API calls.
-
 .. _shutdown:
 
 Ending Execution with ``llvm_shutdown()``
 -----------------------------------------
 
 When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to
-deallocate memory used for internal structures.  This will also invoke
-``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode.  As
-such, ``llvm_shutdown()`` requires the same isolation guarantees as
-``llvm_stop_multithreaded()``.
-
-Note that, if you use scope-based shutdown, you can use the
-``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor.
+deallocate memory used for internal structures.
 
 .. _managedstatic:
 
@@ -2217,20 +2184,11 @@ Lazy Initialization with ``ManagedStatic``
 ------------------------------------------
 
 ``ManagedStatic`` is a utility class in LLVM used to implement static
-initialization of static resources, such as the global type tables.  Before the
-invocation of ``llvm_shutdown()``, it implements a simple lazy initialization
-scheme.  Once ``llvm_start_multithreaded()`` returns, however, it uses
+initialization of static resources, such as the global type tables.  In a
+single-threaded environment, it implements a simple lazy initialization scheme.
+When LLVM is compiled with support for multi-threading, however, it uses
 double-checked locking to implement thread-safe lazy initialization.
 
-Note that, because no other threads are allowed to issue LLVM API calls before
-``llvm_start_multithreaded()`` returns, it is possible to have
-``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s.
-
-The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide
-access to the global lock used to implement the double-checked locking for lazy
-initialization.  These should only be used internally to LLVM, and only if you
-know what you're doing!
-
 .. _llvmcontext:
 
 Achieving Isolation with ``LLVMContext``
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 0e78ed71fa9a..8693a3020aba 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -2848,16 +2848,13 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM);
  * @{
  */
 
-/** Allocate and initialize structures needed to make LLVM safe for
-    multithreading. The return value indicates whether multithreaded
-    initialization succeeded. Must be executed in isolation from all
-    other LLVM api calls.
-    @see llvm::llvm_start_multithreaded */
+/** Deprecated: Multi-threading can only be enabled/disabled with the compile
+    time define LLVM_ENABLE_THREADS.  This function always returns
+    LLVMIsMultithreaded(). */
 LLVMBool LLVMStartMultithreaded(void);
 
-/** Deallocate structures necessary to make LLVM safe for multithreading.
-    Must be executed in isolation from all other LLVM api calls.
-    @see llvm::llvm_stop_multithreaded */
+/** Deprecated: Multi-threading can only be enabled/disabled with the compile
+    time define LLVM_ENABLE_THREADS. */
 void LLVMStopMultithreaded(void);
 
 /** Check whether LLVM is executing in thread-safe mode or not.
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 053defff5234..ee34e9b53088 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -56,7 +56,7 @@ class APSInt : public APInt {
     APInt::toString(Str, Radix, isSigned());
   }
   /// toString - Converts an APInt to a std::string.  This is an inefficient
-  /// method, your should prefer passing in a SmallString instead.
+  /// method; you should prefer passing in a SmallString instead.
   std::string toString(unsigned Radix) const {
     return APInt::toString(Radix, isSigned());
   }
diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 7fd1570cbf12..8f8b7ba2d7ba 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -123,6 +123,15 @@ class MapVector {
     Map.erase(Pos);
     Vector.pop_back();
   }
+
+  /// \brief Remove the element given by Iterator.
+  /// Returns an iterator to the element following the one which was removed,
+  /// which may be end().
+  typename VectorType::iterator erase(typename VectorType::iterator Iterator) {
+    typename MapType::iterator MapIterator = Map.find(Iterator->first);
+    Map.erase(MapIterator);
+    return Vector.erase(Iterator);
+  }
 };
 
 }
diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h
deleted file mode 100644
index 5e83358fc071..000000000000
--- a/include/llvm/ADT/OwningPtr.h
+++ /dev/null
@@ -1,165 +0,0 @@
-//===- llvm/ADT/OwningPtr.h - Smart ptr that owns the pointee ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines and implements the OwningPtr class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_OWNINGPTR_H
-#define LLVM_ADT_OWNINGPTR_H
-
-#include "llvm/Support/Compiler.h"
-#include <cassert>
-#include <cstddef>
-#include <memory>
-
-namespace llvm {
-
-/// OwningPtr smart pointer - OwningPtr mimics a built-in pointer except that it
-/// guarantees deletion of the object pointed to, either on destruction of the
-/// OwningPtr or via an explicit reset().  Once created, ownership of the
-/// pointee object can be taken away from OwningPtr by using the take method.
-template<class T>
-class OwningPtr {
-  OwningPtr(OwningPtr const &) LLVM_DELETED_FUNCTION;
-  OwningPtr &operator=(OwningPtr const &) LLVM_DELETED_FUNCTION;
-  T *Ptr;
-public:
-  explicit OwningPtr(T *P = 0) : Ptr(P) {}
-
-  OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {}
-
-  OwningPtr &operator=(OwningPtr &&Other) {
-    reset(Other.take());
-    return *this;
-  }
-
-  OwningPtr(std::unique_ptr<T> Other) : Ptr(Other.release()) {}
-
-  OwningPtr &operator=(std::unique_ptr<T> Other) {
-    reset(Other.release());
-    return *this;
-  }
-
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
-  operator std::unique_ptr<T>() && { return std::unique_ptr<T>(take()); }
-#endif
-
-  ~OwningPtr() {
-    delete Ptr;
-  }
-
-  /// reset - Change the current pointee to the specified pointer.  Note that
-  /// calling this with any pointer (including a null pointer) deletes the
-  /// current pointer.
-  void reset(T *P = 0) {
-    if (P == Ptr) return;
-    T *Tmp = Ptr;
-    Ptr = P;
-    delete Tmp;
-  }
-
-  /// take - Reset the owning pointer to null and return its pointer.  This does
-  /// not delete the pointer before returning it.
-  T *take() {
-    T *Tmp = Ptr;
-    Ptr = nullptr;
-    return Tmp;
-  }
-
-  T *release() { return take(); }
-
-  std::unique_ptr<T> take_unique() { return std::unique_ptr<T>(take()); }
-
-  T &operator*() const {
-    assert(Ptr && "Cannot dereference null pointer");
-    return *Ptr;
-  }
-
-  T *operator->() const { return Ptr; }
-  T *get() const { return Ptr; }
-  LLVM_EXPLICIT operator bool() const { return Ptr != nullptr; }
-  bool operator!() const { return Ptr == nullptr; }
-  bool isValid() const { return Ptr != nullptr; }
-
-  void swap(OwningPtr &RHS) {
-    T *Tmp = RHS.Ptr;
-    RHS.Ptr = Ptr;
-    Ptr = Tmp;
-  }
-};
-
-template<class T>
-inline void swap(OwningPtr<T> &a, OwningPtr<T> &b) {
-  a.swap(b);
-}
-
-/// OwningArrayPtr smart pointer - OwningArrayPtr provides the same
-///  functionality as OwningPtr, except that it works for array types.
-template<class T>
-class OwningArrayPtr {
-  OwningArrayPtr(OwningArrayPtr const &) LLVM_DELETED_FUNCTION;
-  OwningArrayPtr &operator=(OwningArrayPtr const &) LLVM_DELETED_FUNCTION;
-  T *Ptr;
-public:
-  explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
-
-  OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {}
-
-  OwningArrayPtr &operator=(OwningArrayPtr &&Other) {
-    reset(Other.take());
-    return *this;
-  }
-
-  ~OwningArrayPtr() {
-    delete [] Ptr;
-  }
-
-  /// reset - Change the current pointee to the specified pointer.  Note that
-  /// calling this with any pointer (including a null pointer) deletes the
-  /// current pointer.
-  void reset(T *P = 0) {
-    if (P == Ptr) return;
-    T *Tmp = Ptr;
-    Ptr = P;
-    delete [] Tmp;
-  }
-
-  /// take - Reset the owning pointer to null and return its pointer.  This does
-  /// not delete the pointer before returning it.
-  T *take() {
-    T *Tmp = Ptr;
-    Ptr = 0;
-    return Tmp;
-  }
-
-  T &operator[](std::ptrdiff_t i) const {
-    assert(Ptr && "Cannot dereference null pointer");
-    return Ptr[i];
-  }
-
-  T *get() const { return Ptr; }
-  LLVM_EXPLICIT operator bool() const { return Ptr != 0; }
-  bool operator!() const { return Ptr == nullptr; }
-
-  void swap(OwningArrayPtr &RHS) {
-    T *Tmp = RHS.Ptr;
-    RHS.Ptr = Ptr;
-    Ptr = Tmp;
-  }
-};
-
-template<class T>
-inline void swap(OwningArrayPtr<T> &a, OwningArrayPtr<T> &b) {
-  a.swap(b);
-}
-
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index bd72d3ed6d4d..264aff372581 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -22,6 +22,7 @@
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ScaledNumber.h"
 #include "llvm/Support/raw_ostream.h"
 #include <deque>
 #include <list>
@@ -326,29 +327,24 @@ template <class DigitsT> class UnsignedFloat : UnsignedFloatBase {
     return countLeadingZeros32(Digits) + Width - 32;
   }
 
-  static UnsignedFloat adjustToWidth(uint64_t N, int32_t S) {
-    assert(S >= MinExponent);
-    assert(S <= MaxExponent);
-    if (Width == 64 || N <= DigitsLimits::max())
-      return UnsignedFloat(N, S);
-
-    // Shift right.
-    int Shift = 64 - Width - countLeadingZeros64(N);
-    DigitsType Shifted = N >> Shift;
-
-    // Round.
-    assert(S + Shift <= MaxExponent);
-    return getRounded(UnsignedFloat(Shifted, S + Shift),
-                      N & UINT64_C(1) << (Shift - 1));
+  /// \brief Adjust a number to width, rounding up if necessary.
+  ///
+  /// Should only be called for \c Shift close to zero.
+  ///
+  /// \pre Shift >= MinExponent && Shift + 64 <= MaxExponent.
+  static UnsignedFloat adjustToWidth(uint64_t N, int32_t Shift) {
+    assert(Shift >= MinExponent && "Shift should be close to 0");
+    assert(Shift <= MaxExponent - 64 && "Shift should be close to 0");
+    auto Adjusted = ScaledNumbers::getAdjusted<DigitsT>(N, Shift);
+    return Adjusted;
   }
 
   static UnsignedFloat getRounded(UnsignedFloat P, bool Round) {
-    if (!Round)
+    // Saturate.
+    if (P.isLargest())
       return P;
-    if (P.Digits == DigitsLimits::max())
-      // Careful of overflow in the exponent.
-      return UnsignedFloat(1, P.Exponent) <<= Width;
-    return UnsignedFloat(P.Digits + 1, P.Exponent);
+
+    return ScaledNumbers::getRounded(P.Digits, P.Exponent, Round);
   }
 };
 
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 79fe1dcae639..f57f3eb009a1 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -322,6 +322,7 @@ class TargetTransformInfo {
   enum ShuffleKind {
     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     SK_Reverse,         ///< Reverse the order of the vector.
+    SK_Alternate,       ///< Choose alternate elements from vector.
     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
   };
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index ce7896738d8c..83b5408fb1c2 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -37,7 +37,10 @@ namespace llvm {
   /// for all of the elements in the vector.
   void computeKnownBits(Value *V,  APInt &KnownZero, APInt &KnownOne,
                         const DataLayout *TD = nullptr, unsigned Depth = 0);
-  void computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero);
+  /// Compute known bits from the range metadata.
+  /// \p KnownZero the set of bits that are known to be zero
+  void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+                                         APInt &KnownZero);
 
   /// ComputeSignBit - Determine whether the sign bit is known to be zero or
   /// one.  Convenience wrapper around computeKnownBits.
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index 4c194a638d4a..0d0d6a71ab14 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -30,7 +30,8 @@ namespace llvm {
   /// deserialization of function bodies.  If successful, this takes ownership
   /// of 'buffer. On error, this *does not* take ownership of Buffer.
   ErrorOr<Module *> getLazyBitcodeModule(MemoryBuffer *Buffer,
-                                         LLVMContext &Context);
+                                         LLVMContext &Context,
+                                         bool BufferOwned = true);
 
   /// getStreamedBitcodeModule - Read the header of the specified stream
   /// and prepare for lazy deserialization and streaming of function bodies.
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index e5dab6191ab6..d349af810650 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -22,10 +22,10 @@
 #include "llvm/IR/ValueMap.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <map>
+#include <mutex>
 #include <string>
 #include <vector>
 
@@ -42,7 +42,6 @@ class JITEventListener;
 class JITMemoryManager;
 class MachineCodeInfo;
 class Module;
-class MutexGuard;
 class ObjectCache;
 class RTDyldMemoryManager;
 class Triple;
@@ -59,7 +58,7 @@ class ExecutionEngineState {
 public:
   struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
     typedef ExecutionEngineState *ExtraData;
-    static sys::Mutex *getMutex(ExecutionEngineState *EES);
+    static std::recursive_mutex *getMutex(ExecutionEngineState *EES);
     static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old);
     static void onRAUW(ExecutionEngineState *, const GlobalValue *,
                        const GlobalValue *);
@@ -164,7 +163,7 @@ class ExecutionEngine {
   /// lock - This lock protects the ExecutionEngine, MCJIT, JIT, JITResolver and
   /// JITEmitter classes.  It must be held while changing the internal state of
   /// any of those classes.
-  sys::Mutex lock;
+  std::recursive_mutex lock;
 
   //===--------------------------------------------------------------------===//
   //  ExecutionEngine Startup
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 39c7c37dafd5..257bb80f2119 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -71,6 +71,9 @@ class Constant : public User {
   /// isThreadDependent - Return true if the value can vary between threads.
   bool isThreadDependent() const;
 
+  /// Return true if the value is dependent on a dllimport variable.
+  bool isDLLImportDependent() const;
+
   /// isConstantUsed - Return true if the constant has users other than constant
   /// exprs and other dangling things.
   bool isConstantUsed() const;
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index ecb5668d8e95..09607d5834d6 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,4 +33,40 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tgid">;
 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tidig">;
+
 } // End TargetPrefix = "r600"
+
+let TargetPrefix = "AMDGPU" in {
+def int_AMDGPU_div_scale :
+  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_scale">;
+
+def int_AMDGPU_div_fmas :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fmas">;
+
+def int_AMDGPU_div_fixup :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_div_fixup">;
+
+def int_AMDGPU_trig_preop :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_trig_preop">;
+
+def int_AMDGPU_rcp :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rcp">;
+
+def int_AMDGPU_rsq :
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>], [IntrNoMem]>,
+            GCCBuiltin<"__builtin_amdgpu_rsq">;
+
+
+} // End TargetPrefix = "AMDGPU"
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index f196f334b605..17b0fe01b116 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -28,9 +28,9 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Support/type_traits.h"
 #include <iterator>
+#include <mutex>
 
 namespace llvm {
 
@@ -45,7 +45,7 @@ class ValueMapConstIterator;
 /// This class defines the default behavior for configurable aspects of
 /// ValueMap<>.  User Configs should inherit from this class to be as compatible
 /// as possible with future versions of ValueMap.
-template<typename KeyT, typename MutexT = sys::Mutex>
+template<typename KeyT, typename MutexT = std::recursive_mutex>
 struct ValueMapConfig {
   typedef MutexT mutex_type;
 
@@ -216,11 +216,11 @@ class ValueMapCallbackVH : public CallbackVH {
     ValueMapCallbackVH Copy(*this);
     typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
     if (M)
-      M->acquire();
+      M->lock();
     Config::onDelete(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
     Copy.Map->Map.erase(Copy);  // Definitely destroys *this.
     if (M)
-      M->release();
+      M->unlock();
   }
   void allUsesReplacedWith(Value *new_key) override {
     assert(isa<KeySansPointerT>(new_key) &&
@@ -229,7 +229,7 @@ class ValueMapCallbackVH : public CallbackVH {
     ValueMapCallbackVH Copy(*this);
     typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data);
     if (M)
-      M->acquire();
+      M->lock();
 
     KeyT typed_new_key = cast<KeySansPointerT>(new_key);
     // Can destroy *this:
@@ -245,7 +245,7 @@ class ValueMapCallbackVH : public CallbackVH {
       }
     }
     if (M)
-      M->release();
+      M->unlock();
   }
 };
 
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
new file mode 100644
index 000000000000..2819b757b8bf
--- /dev/null
+++ b/include/llvm/MC/ConstantPools.h
@@ -0,0 +1,80 @@
+//===- ConstantPool.h - Keep track of assembler-generated  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ConstantPool and AssemblerConstantPools classes.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_MC_CONSTANTPOOL_H
+#define LLVM_MC_CONSTANTPOOL_H
+
+#include "llvm/ADT/SmallVector.h"
+namespace llvm {
+class MCContext;
+class MCExpr;
+class MCSection;
+class MCStreamer;
+class MCSymbol;
+// A class to keep track of assembler-generated constant pools that are use to
+// implement the ldr-pseudo.
+class ConstantPool {
+  typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
+  EntryVecTy Entries;
+
+public:
+  // Initialize a new empty constant pool
+  ConstantPool() {}
+
+  // Add a new entry to the constant pool in the next slot.
+  // \param Value is the new entry to put in the constant pool.
+  //
+  // \returns a MCExpr that references the newly inserted value
+  const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
+
+  // Emit the contents of the constant pool using the provided streamer.
+  void emitEntries(MCStreamer &Streamer);
+
+  // Return true if the constant pool is empty
+  bool empty();
+};
+
+class AssemblerConstantPools {
+  // Map type used to keep track of per-Section constant pools used by the
+  // ldr-pseudo opcode. The map associates a section to its constant pool. The
+  // constant pool is a vector of (label, value) pairs. When the ldr
+  // pseudo is parsed we insert a new (label, value) pair into the constant pool
+  // for the current section and add MCSymbolRefExpr to the new label as
+  // an opcode to the ldr. After we have parsed all the user input we
+  // output the (label, value) pairs in each constant pool at the end of the
+  // section.
+  //
+  // We use the MapVector for the map type to ensure stable iteration of
+  // the sections at the end of the parse. We need to iterate over the
+  // sections in a stable order to ensure that we have print the
+  // constant pools in a deterministic order when printing an assembly
+  // file.
+  typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
+  ConstantPoolMapTy ConstantPools;
+
+public:
+  AssemblerConstantPools() {}
+  ~AssemblerConstantPools() {}
+
+  void emitAll(MCStreamer &Streamer);
+  void emitForCurrentSection(MCStreamer &Streamer);
+  const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
+
+private:
+  ConstantPool *getConstantPool(const MCSection *Section);
+  ConstantPool &getOrCreateConstantPool(const MCSection *Section);
+};
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 2f9b32b984ec..ce3a99bb3fe1 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -11,10 +11,12 @@
 #define LLVM_MC_MCCONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -129,11 +131,10 @@ namespace llvm {
     /// assembly source files.
     unsigned GenDwarfFileNumber;
 
-    /// The default initial text section that we generate dwarf debugging line
-    /// info for when generating dwarf assembly source files.
-    const MCSection *GenDwarfSection;
-    /// Symbols created for the start and end of this section.
-    MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym;
+    /// Symbols created for the start and end of each section, used for
+    /// generating the .debug_ranges and .debug_aranges sections.
+    MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >
+    SectionStartEndSyms;
 
     /// The information gathered from labels that will have dwarf label
     /// entries when generating dwarf assembly source files.
@@ -374,16 +375,18 @@ namespace llvm {
     void setGenDwarfFileNumber(unsigned FileNumber) {
       GenDwarfFileNumber = FileNumber;
     }
-    const MCSection *getGenDwarfSection() { return GenDwarfSection; }
-    void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; }
-    MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; }
-    void setGenDwarfSectionStartSym(MCSymbol *Sym) {
-      GenDwarfSectionStartSym = Sym;
+    MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> > &
+    getGenDwarfSectionSyms() {
+      return SectionStartEndSyms;
     }
-    MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; }
-    void setGenDwarfSectionEndSym(MCSymbol *Sym) {
-      GenDwarfSectionEndSym = Sym;
+    std::pair<MapVector<const MCSection *,
+                        std::pair<MCSymbol *, MCSymbol *> >::iterator,
+              bool>
+    addGenDwarfSection(const MCSection *Sec) {
+      return SectionStartEndSyms.insert(
+          std::make_pair(Sec, std::make_pair(nullptr, nullptr)));
     }
+    void finalizeDwarfSections(MCStreamer &MCOS);
     const std::vector<MCGenDwarfLabelEntry> &getMCGenDwarfLabelEntries() const {
       return MCGenDwarfLabelEntries;
     }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index e41a8ba63725..a8ba23ac00f3 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -126,6 +126,10 @@ class MCObjectStreamer : public MCStreamer {
   void EmitFill(uint64_t NumBytes, uint8_t FillValue) override;
   void EmitZeros(uint64_t NumBytes) override;
   void FinishImpl() override;
+
+  virtual bool mayHaveInstructions() const {
+    return getCurrentSectionData()->hasInstructions();
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 2a8367afeb8f..1c6039bf6d4d 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -722,6 +722,8 @@ class MCStreamer {
   virtual void FinishImpl() = 0;
   /// Finish - Finish emission of machine code.
   void Finish();
+
+  virtual bool mayHaveInstructions() const { return true; }
 };
 
 /// createNullStreamer - Create a dummy machine code streamer, which does
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index 80cc8befb7a7..eb4348ed3ec2 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -29,6 +29,7 @@ class MCTargetOptions {
   bool ShowMCEncoding : 1;
   bool ShowMCInst : 1;
   bool AsmVerbose : 1;
+  int DwarfVersion;
   MCTargetOptions();
 };
 
@@ -41,7 +42,8 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
           ARE_EQUAL(MCUseDwarfDirectory) &&
           ARE_EQUAL(ShowMCEncoding) &&
           ARE_EQUAL(ShowMCInst) &&
-          ARE_EQUAL(AsmVerbose));
+          ARE_EQUAL(AsmVerbose) &&
+	  ARE_EQUAL(DwarfVersion));
 #undef ARE_EQUAL
 }
 
diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 17a117a2a3bd..344983dec5f2 100644
--- a/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -33,11 +33,15 @@ cl::opt<bool> RelaxAll("mc-relax-all",
                        cl::desc("When used with filetype=obj, "
                                 "relax all fixups in the emitted object file"));
 
+cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
+                          cl::init(0));
+
 static inline MCTargetOptions InitMCTargetOptionsFromFlags() {
   MCTargetOptions Options;
   Options.SanitizeAddress =
       (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
   Options.MCRelaxAll = RelaxAll;
+  Options.DwarfVersion = DwarfVersion;
   return Options;
 }
 
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 47e93c26b46a..74448f973b2e 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MachO.h"
 
@@ -52,6 +53,9 @@ class MachOUniversalBinary : public Binary {
 
     ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); }
     uint32_t getCPUType() const { return Header.cputype; }
+    std::string getArchTypeName() const {
+      return Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    }
 
     std::error_code getAsObjectFile(std::unique_ptr<ObjectFile> &Result) const;
 
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index a3aaf17f1d66..5ca245057a55 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -253,12 +253,14 @@ class RelocVisitor {
 
   /// PPC64 ELF
   RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getAddend64BE(R);
+    int64_t Addend;
+    getELFRelocationAddend(R, Addend);
     uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
   RelocToApply visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) {
-    int64_t Addend = getAddend64BE(R);
+    int64_t Addend;
+    getELFRelocationAddend(R, Addend);
     return RelocToApply(Value + Addend, 8);
   }
 
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 113373694556..40015ec9f6db 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -136,6 +136,10 @@ class SymbolicFile : public Binary {
   basic_symbol_iterator symbol_end() const {
     return symbol_end_impl();
   }
+  typedef iterator_range<basic_symbol_iterator> basic_symbol_iterator_range;
+  basic_symbol_iterator_range symbols() const {
+    return basic_symbol_iterator_range(symbol_begin(), symbol_end());
+  }
 
   // construction aux.
   static ErrorOr<SymbolicFile *> createIRObjectFile(MemoryBuffer *Object,
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index ab40a1a0d40a..af8cae15fc2d 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -328,6 +328,7 @@ class InputArgList : public ArgList  {
   unsigned MakeIndex(StringRef String0) const;
   unsigned MakeIndex(StringRef String0, StringRef String1) const;
 
+  using ArgList::MakeArgString;
   const char *MakeArgString(StringRef Str) const override;
 
   /// @}
@@ -365,6 +366,7 @@ class DerivedArgList : public ArgList {
   /// (to be freed).
   void AddSynthesizedArg(Arg *A);
 
+  using ArgList::MakeArgString;
   const char *MakeArgString(StringRef Str) const override;
 
   /// AddFlagArg - Construct a new FlagArg for the given option \p Id and
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index c2b9f95956e8..22d57edb74e4 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -29,6 +29,7 @@
 #ifndef LLVM_PASS_H
 #define LLVM_PASS_H
 
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Compiler.h"
 #include <string>
 
@@ -82,13 +83,14 @@ enum PassKind {
 class Pass {
   AnalysisResolver *Resolver;  // Used to resolve analysis
   const void *PassID;
+  mutable const PassInfo *PI;
   PassKind Kind;
   void operator=(const Pass&) LLVM_DELETED_FUNCTION;
   Pass(const Pass &) LLVM_DELETED_FUNCTION;
 
 public:
   explicit Pass(PassKind K, char &pid)
-    : Resolver(nullptr), PassID(&pid), Kind(K) { }
+    : Resolver(nullptr), PassID(&pid), PI(nullptr), Kind(K) { }
   virtual ~Pass();
 
 
@@ -105,6 +107,13 @@ class Pass {
     return PassID;
   }
 
+  /// getPassInfo - Return the PassInfo associated with this pass.
+  const PassInfo *getPassInfo() const {
+    if (!PI)
+      PI = PassRegistry::getPassRegistry()->getPassInfo(PassID);
+    return PI;
+  }
+
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary initialization before any pass is run.
   ///
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index c5005a1d11ed..7a5a71dc6a31 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -168,8 +168,8 @@ class RawInstrProfReader : public InstrProfReader {
   const char *NamesStart;
   const char *ProfileEnd;
 
-  RawInstrProfReader(const TextInstrProfReader &) LLVM_DELETED_FUNCTION;
-  RawInstrProfReader &operator=(const TextInstrProfReader &)
+  RawInstrProfReader(const RawInstrProfReader &) LLVM_DELETED_FUNCTION;
+  RawInstrProfReader &operator=(const RawInstrProfReader &)
     LLVM_DELETED_FUNCTION;
 public:
   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h
index 16312004c871..f63e0a61f639 100644
--- a/include/llvm/Support/ARMBuildAttributes.h
+++ b/include/llvm/Support/ARMBuildAttributes.h
@@ -159,6 +159,11 @@ enum {
   AddressDirect = 1, // Address imported data directly
   AddressGOT = 2, // Address imported data indirectly (via GOT)
 
+  // Tag_ABI_PCS_wchar_t, (=18), uleb128
+  WCharProhibited = 0,  // wchar_t is not used
+  WCharWidth2Bytes = 2, // sizeof(wchar_t) == 2
+  WCharWidth4Bytes = 4, // sizeof(wchar_t) == 4
+
   // Tag_ABI_FP_denormal, (=20), uleb128
   PreserveFPSign = 2, // sign when flushed-to-zero is preserved
 
@@ -166,6 +171,16 @@ enum {
   AllowRTABI = 2,  // numbers, infinities, and one quiet NaN (see [RTABI])
   AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings
 
+  // Tag_ABI_enum_size, (=26), uleb128
+  EnumProhibited = 0, // The user prohibited the use of enums when building
+                      // this entity.
+  EnumSmallest = 1,   // Enum is smallest container big enough to hold all
+                      // values.
+  Enum32Bit = 2,      // Enum is at least 32 bits.
+  Enum32BitABI = 3,   // Every enumeration visible across an ABI-complying
+                      // interface contains a value needing 32 bits to encode
+                      // it; other enums can be containerized.
+
   // Tag_ABI_HardFP_use, (=27), uleb128
   HardFPImplied = 0,          // FP use should be implied by Tag_FP_arch
   HardFPSinglePrecision = 1,  // Single-precision only
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index ca316441ea7e..cd9f75600cbc 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -57,7 +57,6 @@ enum LLVMConstants : uint32_t {
   DW_TAG_user_base = 0x1000, // Recommended base for user tags.
 
   DWARF_VERSION = 4,       // Default dwarf version we output.
-  DW_CIE_VERSION = 1,      // Common frame information version.
   DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes.
   DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames.
   DW_ARANGES_VERSION = 2   // Section version number for .debug_aranges.
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 0b3e55b91524..31b34ffa703e 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -124,6 +124,8 @@ enum {
 };
 
 // Machine architectures
+// See current registered ELF machine architectures at:
+//    http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
 enum {
   EM_NONE          = 0, // No machine
   EM_M32           = 1, // AT&T WE 32100
@@ -287,7 +289,26 @@ enum {
   EM_RL78          = 197, // Renesas RL78 family
   EM_VIDEOCORE5    = 198, // Broadcom VideoCore V processor
   EM_78KOR         = 199, // Renesas 78KOR family
-  EM_56800EX       = 200  // Freescale 56800EX Digital Signal Controller (DSC)
+  EM_56800EX       = 200, // Freescale 56800EX Digital Signal Controller (DSC)
+  EM_BA1           = 201, // Beyond BA1 CPU architecture
+  EM_BA2           = 202, // Beyond BA2 CPU architecture
+  EM_XCORE         = 203, // XMOS xCORE processor family
+  EM_MCHP_PIC      = 204, // Microchip 8-bit PIC(r) family
+  EM_INTEL205      = 205, // Reserved by Intel
+  EM_INTEL206      = 206, // Reserved by Intel
+  EM_INTEL207      = 207, // Reserved by Intel
+  EM_INTEL208      = 208, // Reserved by Intel
+  EM_INTEL209      = 209, // Reserved by Intel
+  EM_KM32          = 210, // KM211 KM32 32-bit processor
+  EM_KMX32         = 211, // KM211 KMX32 32-bit processor
+  EM_KMX16         = 212, // KM211 KMX16 16-bit processor
+  EM_KMX8          = 213, // KM211 KMX8 8-bit processor
+  EM_KVARC         = 214, // KM211 KVARC processor
+  EM_CDP           = 215, // Paneve CDP architecture family
+  EM_COGE          = 216, // Cognitive Smart Memory Processor
+  EM_COOL          = 217, // iCelero CoolEngine
+  EM_NORC          = 218, // Nanoradio Optimized RISC
+  EM_CSR_KALIMBA   = 219  // CSR Kalimba architecture family
 };
 
 // Object file classes.
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 6abe90446b47..e56e2b718a75 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -335,6 +335,12 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting = true);
 /// @param to The path to rename to. This is created.
 std::error_code rename(const Twine &from, const Twine &to);
 
+/// @brief Copy the contents of \a From to \a To.
+///
+/// @param From The path to copy from.
+/// @param To The path to copy to. This is created.
+std::error_code copy_file(const Twine &From, const Twine &To);
+
 /// @brief Resize path to size. File is resized as if by POSIX truncate().
 ///
 /// @param path Input path.
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 1bb8cea092f9..d8fbfeb8e20c 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -103,9 +103,6 @@ void llvm_shutdown();
 /// llvm_shutdown() when it is destroyed.
 struct llvm_shutdown_obj {
   llvm_shutdown_obj() { }
-  explicit llvm_shutdown_obj(bool multithreaded) {
-    if (multithreaded) llvm_start_multithreaded();
-  }
   ~llvm_shutdown_obj() { llvm_shutdown(); }
 };
 
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
new file mode 100644
index 000000000000..f6594ec6aa55
--- /dev/null
+++ b/include/llvm/Support/ScaledNumber.h
@@ -0,0 +1,104 @@
+//===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions (and a class) useful for working with scaled
+// numbers -- in particular, pairs of integers where one represents digits and
+// another represents a scale.  The functions are helpers and live in the
+// namespace ScaledNumbers.  The class ScaledNumber is useful for modelling
+// certain cost metrics that need simple, integer-like semantics that are easy
+// to reason about.
+//
+// These might remind you of soft-floats.  If you want one of those, you're in
+// the wrong place.  Look at include/llvm/ADT/APFloat.h instead.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SCALEDNUMBER_H
+#define LLVM_SUPPORT_SCALEDNUMBER_H
+
+#include "llvm/Support/MathExtras.h"
+
+#include <cstdint>
+#include <limits>
+#include <utility>
+
+namespace llvm {
+namespace ScaledNumbers {
+
+/// \brief Get the width of a number.
+template <class DigitsT> inline int getWidth() { return sizeof(DigitsT) * 8; }
+
+/// \brief Conditionally round up a scaled number.
+///
+/// Given \c Digits and \c Scale, round up iff \c ShouldRound is \c true.
+/// Always returns \c Scale unless there's an overflow, in which case it
+/// returns \c 1+Scale.
+///
+/// \pre adding 1 to \c Scale will not overflow INT16_MAX.
+template <class DigitsT>
+inline std::pair<DigitsT, int16_t> getRounded(DigitsT Digits, int16_t Scale,
+                                              bool ShouldRound) {
+  static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
+
+  if (ShouldRound)
+    if (!++Digits)
+      // Overflow.
+      return std::make_pair(DigitsT(1) << (getWidth<DigitsT>() - 1), Scale + 1);
+  return std::make_pair(Digits, Scale);
+}
+
+/// \brief Convenience helper for 32-bit rounding.
+inline std::pair<uint32_t, int16_t> getRounded32(uint32_t Digits, int16_t Scale,
+                                                 bool ShouldRound) {
+  return getRounded(Digits, Scale, ShouldRound);
+}
+
+/// \brief Convenience helper for 64-bit rounding.
+inline std::pair<uint64_t, int16_t> getRounded64(uint64_t Digits, int16_t Scale,
+                                                 bool ShouldRound) {
+  return getRounded(Digits, Scale, ShouldRound);
+}
+
+/// \brief Adjust a 64-bit scaled number down to the appropriate width.
+///
+/// Adjust a soft float with 64-bits of digits down, keeping as much
+/// information as possible, and rounding up on half.
+///
+/// \pre Adding 1 to \c Scale will not overflow INT16_MAX.
+template <class DigitsT>
+inline std::pair<DigitsT, int16_t> getAdjusted(uint64_t Digits,
+                                               int16_t Scale = 0) {
+  static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
+
+  const int Width = getWidth<DigitsT>();
+  if (Width == 64 || Digits <= std::numeric_limits<DigitsT>::max())
+    return std::make_pair(Digits, Scale);
+
+  // Shift right and round.
+  int Shift = 64 - Width - countLeadingZeros(Digits);
+  return getRounded<DigitsT>(Digits >> Shift, Scale + Shift,
+                             Digits & (UINT64_C(1) << (Shift - 1)));
+}
+
+/// \brief Convenience helper for adjusting to 32 bits.
+inline std::pair<uint32_t, int16_t> getAdjusted32(uint64_t Digits,
+                                                  int16_t Scale = 0) {
+  return getAdjusted<uint32_t>(Digits, Scale);
+}
+
+/// \brief Convenience helper for adjusting to 64 bits.
+inline std::pair<uint64_t, int16_t> getAdjusted64(uint64_t Digits,
+                                                  int16_t Scale = 0) {
+  return getAdjusted<uint64_t>(Digits, Scale);
+}
+}
+}
+
+#endif
+
diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h
index 9c9e55c0a75a..6e71ad47c8dd 100644
--- a/include/llvm/Support/StreamableMemoryObject.h
+++ b/include/llvm/Support/StreamableMemoryObject.h
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryObject.h"
 #include <cassert>
 #include <memory>
@@ -115,7 +116,7 @@ class StreamingMemoryObject : public StreamableMemoryObject {
     // requiring that the bitcode size be known, or otherwise ensuring that
     // the memory doesn't go away/get reallocated, but it's
     // not currently necessary. Users that need the pointer don't stream.
-    assert(0 && "getPointer in streaming memory objects not allowed");
+    llvm_unreachable("getPointer in streaming memory objects not allowed");
     return nullptr;
   }
   bool isValidAddress(uint64_t address) const override;
@@ -154,8 +155,8 @@ class StreamingMemoryObject : public StreamableMemoryObject {
                                         kChunkSize);
       BytesRead += bytes;
       if (bytes < kChunkSize) {
-        if (ObjectSize && BytesRead < Pos)
-          assert(0 && "Unexpected short read fetching bitcode");
+        assert((!ObjectSize || BytesRead >= Pos) &&
+               "Unexpected short read fetching bitcode");
         if (BytesRead <= Pos) { // reached EOF/ran out of bytes
           ObjectSize = BytesRead;
           EOFReached = true;
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 7e1394cb2335..3e0465340c3b 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -29,6 +29,7 @@
 #ifndef LLVM_SUPPORT_STRINGPOOL_H
 #define LLVM_SUPPORT_STRINGPOOL_H
 
+#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/StringMap.h"
 #include <cassert>
 #include <new>
@@ -128,10 +129,10 @@ namespace llvm {
     }
 
     inline const char *operator*() const { return begin(); }
-    inline operator bool() const { return S != nullptr; }
+    inline LLVM_EXPLICIT operator bool() const { return S != nullptr; }
 
-    inline bool operator==(const PooledStringPtr &That) { return S == That.S; }
-    inline bool operator!=(const PooledStringPtr &That) { return S != That.S; }
+    inline bool operator==(const PooledStringPtr &That) const { return S == That.S; }
+    inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }
   };
 
 } // End llvm namespace
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index a7e8774558d5..7e8758407c7c 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// TThis file defines llvm_start_multithreaded() and friends.
+// This file declares helper functions for running LLVM in a multi-threaded
+// environment.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,32 +16,10 @@
 #define LLVM_SUPPORT_THREADING_H
 
 namespace llvm {
-  /// llvm_start_multithreaded - Allocate and initialize structures needed to
-  /// make LLVM safe for multithreading.  The return value indicates whether
-  /// multithreaded initialization succeeded.  LLVM will still be operational
-  /// on "failed" return, and will still be safe for hosting threading
-  /// applications in the JIT, but will not be safe for concurrent calls to the
-  /// LLVM APIs.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  bool llvm_start_multithreaded();
-
-  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
-  /// safe for multithreading.
-  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
-  void llvm_stop_multithreaded();
-
-  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
-  /// mode or not.
+  /// Returns true if LLVM is compiled with support for multi-threading, and
+  /// false otherwise.
   bool llvm_is_multithreaded();
 
-  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_acquire_global_lock();
-
-  /// release_global_lock - Release the global lock.  This is a no-op if called
-  /// before llvm_start_multithreaded().
-  void llvm_release_global_lock();
-
   /// llvm_execute_on_thread - Execute the given \p UserFn on a separate
   /// thread, passing it the provided \p UserData.
   ///
diff --git a/utils/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h
similarity index 100%
rename from utils/TableGen/SetTheory.h
rename to include/llvm/TableGen/SetTheory.h
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index bb164288b013..83f1997bf205 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -73,6 +73,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// MISchedPostRA, is set.
   virtual bool enablePostMachineScheduler() const;
 
+  /// \brief True if the subtarget should run the atomic expansion pass.
+  virtual bool enableAtomicExpandLoadLinked() const;
+
   /// \brief Override generic scheduling policy within a region.
   ///
   /// This is a convenient way for targets that don't provide any custom
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 3684fda854fc..8aa6e5a190bf 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1625,6 +1625,38 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
                                          MaxRecurse))
       return V;
 
+  // (A & C)|(B & D)
+  Value *C = nullptr, *D = nullptr;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    ConstantInt *C1 = dyn_cast<ConstantInt>(C);
+    ConstantInt *C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+      // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      Value *V1, *V2;
+      if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
+          match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+          return A;
+        if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+          return A;
+      }
+      // Or commutes, try both ways.
+      if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
+          match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+        // Add commutes, try both ways.
+        if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+          return B;
+        if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+          return B;
+      }
+    }
+  }
+
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
@@ -2241,6 +2273,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // If a bit is known to be zero for A and known to be one for B,
+  // then A and B cannot be equal.
+  if (ICmpInst::isEquality(Pred)) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+      uint32_t BitWidth = CI->getBitWidth();
+      APInt LHSKnownZero(BitWidth, 0);
+      APInt LHSKnownOne(BitWidth, 0);
+      computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+      if (((LHSKnownOne & RHSKnownZero) != 0) ||
+          ((LHSKnownZero & RHSKnownOne) != 0))
+        return (Pred == ICmpInst::ICMP_EQ)
+                   ? ConstantInt::getFalse(CI->getContext())
+                   : ConstantInt::getTrue(CI->getContext());
+    }
+  }
+
   // Special logic for binary operators.
   BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4f4875357828..9de945ed7016 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -188,7 +188,8 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
     KnownOne.setBit(BitWidth - 1);
 }
 
-void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+                                             APInt &KnownZero) {
   unsigned BitWidth = KnownZero.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
@@ -338,7 +339,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsLoad(*MD, KnownZero);
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -733,6 +734,12 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     break;
   }
   case Instruction::Call:
+  case Instruction::Invoke:
+    if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+    // If a range metadata is attached to this IntrinsicInst, intersect the
+    // explicit range specified by the metadata and the implicit range of
+    // the intrinsic.
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
@@ -742,16 +749,16 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         // If this call is undefined for 0, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
           LowBits -= 1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::ctpop: {
         unsigned LowBits = Log2_32(BitWidth)+1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::x86_sse42_crc32_64_64:
-        KnownZero = APInt::getHighBitsSet(64, 32);
+        KnownZero |= APInt::getHighBitsSet(64, 32);
         break;
       }
     }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index c13eba78c2ad..9c398277d42d 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3374,16 +3374,16 @@ const std::error_category &BitcodeReader::BitcodeErrorCategory() {
 /// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
 ///
 ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
-                                             LLVMContext &Context) {
+                                             LLVMContext &Context,
+                                             bool BufferOwned) {
   Module *M = new Module(Buffer->getBufferIdentifier(), Context);
-  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context, BufferOwned);
   M->setMaterializer(R);
   if (std::error_code EC = R->ParseBitcodeInto(M)) {
+    R->releaseBuffer(); // Never take ownership on error.
     delete M;  // Also deletes R.
     return EC;
   }
-  // Have the BitcodeReader dtor delete 'Buffer'.
-  R->setBufferOwned(true);
 
   R->materializeForwardReferencedFunctions();
 
@@ -3404,21 +3404,16 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
     delete M;  // Also deletes R.
     return nullptr;
   }
-  R->setBufferOwned(false); // no buffer to delete
   return M;
 }
 
 ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
                                          LLVMContext &Context) {
-  ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
+  ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context, false);
   if (!ModuleOrErr)
     return ModuleOrErr;
   Module *M = ModuleOrErr.get();
 
-  // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
-  // there was an error.
-  static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
-
   // Read in the entire module, and destroy the BitcodeReader.
   if (std::error_code EC = M->materializeAllPermanently()) {
     delete M;
@@ -3434,9 +3429,7 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
 std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
                                          LLVMContext& Context,
                                          std::string *ErrMsg) {
-  BitcodeReader *R = new BitcodeReader(Buffer, Context);
-  // Don't let the BitcodeReader dtor delete 'Buffer'.
-  R->setBufferOwned(false);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context, /*BufferOwned*/ false);
 
   std::string Triple("");
   if (std::error_code EC = R->ParseTriple(Triple))
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 36849d404541..6aa3e0e5adf7 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -223,29 +223,25 @@ class BitcodeReader : public GVMaterializer {
     return std::error_code(E, BitcodeErrorCategory());
   }
 
-  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
-    : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
-      LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
-      ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
-  }
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, bool BufferOwned)
+      : Context(C), TheModule(nullptr), Buffer(buffer),
+        BufferOwned(BufferOwned), LazyStreamer(nullptr), NextUnreadBit(0),
+        SeenValueSymbolTable(false), ValueList(C), MDValueList(C),
+        SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
-    : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
-      LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
-      ValueList(C), MDValueList(C),
-      SeenFirstFunctionBody(false), UseRelativeIDs(false) {
-  }
-  ~BitcodeReader() {
-    FreeState();
-  }
+      : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
+        LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+        ValueList(C), MDValueList(C), SeenFirstFunctionBody(false),
+        UseRelativeIDs(false) {}
+  ~BitcodeReader() { FreeState(); }
 
   void materializeForwardReferencedFunctions();
 
   void FreeState();
 
-  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
-  /// when the reader is destroyed.
-  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+  void releaseBuffer() {
+    Buffer = nullptr;
+  }
 
   bool isMaterializable(const GlobalValue *GV) const override;
   bool isDematerializable(const GlobalValue *GV) const override;
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index f31e1fa9b173..72451ec9500c 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -97,7 +97,7 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
   switch (Op.getEncoding()) {
   case BitCodeAbbrevOp::Array:
   case BitCodeAbbrevOp::Blob:
-    assert(0 && "Should not reach here");
+    llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
     Vals.push_back(Read((unsigned)Op.getEncodingData()));
     break;
@@ -117,7 +117,7 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
   switch (Op.getEncoding()) {
   case BitCodeAbbrevOp::Array:
   case BitCodeAbbrevOp::Blob:
-    assert(0 && "Should not reach here");
+    llvm_unreachable("Should not reach here");
   case BitCodeAbbrevOp::Fixed:
     (void)Read((unsigned)Op.getEncodingData());
     break;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 996dc2122f49..d30588e5d4f1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -47,7 +47,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index a88aebd2d22f..f78ca2c03b4e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
                  cl::init(Default));
 
-static cl::opt<unsigned>
-DwarfVersionNumber("dwarf-version", cl::Hidden,
-                   cl::desc("Generate DWARF for dwarf version."), cl::init(0));
-
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
@@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
+  unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
+  Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule();
@@ -1039,9 +1038,9 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
+    emitDebugLocDWO();
     // Emit DWO addresses.
     AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
-    emitDebugLocDWO();
   } else
     // Emit info into a debug loc section.
     emitDebugLoc();
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index 4c4150bfec90..c7cc4bcb1cb6 100644
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -21,17 +21,19 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "arm-atomic-expand"
 
 namespace {
   class AtomicExpandLoadLinked : public FunctionPass {
-    const TargetLowering *TLI;
+    const TargetMachine *TM;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+      : FunctionPass(ID), TM(TM) {
       initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
     }
 
@@ -59,7 +61,7 @@ FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
 }
 
 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
-  if (!TLI)
+  if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
     return false;
 
   SmallVector<Instruction *, 1> AtomicInsts;
@@ -76,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
 
   bool MadeChange = false;
   for (Instruction *Inst : AtomicInsts) {
-    if (!TLI->shouldExpandAtomicInIR(Inst))
+    if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
       continue;
 
     if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -98,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
   // Load instructions don't actually need a leading fence, even in the
   // SequentiallyConsistent case.
   AtomicOrdering MemOpOrder =
-    TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+      TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
+                                                          : LI->getOrdering();
 
   // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
   // an ldrexd (A3.5.3).
   IRBuilder<> Builder(LI);
-  Value *Val =
-      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+  Value *Val = TM->getTargetLowering()->emitLoadLinked(
+      Builder, LI->getPointerOperand(), MemOpOrder);
 
   insertTrailingFence(Builder, LI->getOrdering());
 
@@ -165,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
 
   Value *NewVal;
   switch (AI->getOperation()) {
@@ -211,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
     llvm_unreachable("Unknown atomic op");
   }
 
-  Value *StoreSuccess =
-      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
+      Builder, NewVal, Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -278,7 +282,8 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *Loaded =
+      TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
@@ -287,7 +292,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess = TLI->emitStoreConditional(
+  Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
@@ -352,7 +357,7 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
                                                        AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return Ord;
 
   if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
@@ -365,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
 
 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
                                               AtomicOrdering Ord) {
-  if (!TLI->getInsertFencesForAtomic())
+  if (!TM->getTargetLowering()->getInsertFencesForAtomic())
     return;
 
   if (Ord == Acquire || Ord == AcquireRelease)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a982fc..b2737bf754f9 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
   /// are set if the result needs to be inserted and/or extracted from vectors.
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 
+  /// Estimate the cost overhead of SK_Alternate shuffle.
+  unsigned getAltShuffleOverhead(Type *Ty) const;
+
   const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
 
 public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   return OpCost;
 }
 
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+  assert(Ty->isVectorTy() && "Can only shuffle vectors");
+  unsigned Cost = 0;
+  // Shuffle cost is equal to the cost of extracting element from its argument
+  // plus the cost of inserting them onto the result vector.
+
+  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+  // 0 of first vector, index 1 of second vector,index 2 of first vector and
+  // finally index 3 of second vector and insert them at index <0,1,2,3> of
+  // result vector.
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+  return Cost;
+}
+
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                   Type *SubTp) const {
+  if (Kind == SK_Alternate) {
+    return getAltShuffleOverhead(Tp);
+  }
   return 1;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 48eb86c49c39..a4245a6016c2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3152,6 +3152,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                 Node->getOperand(0), Node->getValueType(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::FP_TO_SINT: {
+    EVT VT = Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+
+    // FIXME: Only f32 to i64 conversions are supported.
+    if (VT != MVT::f32 || NVT != MVT::i64)
+      break;
+
+    // Expand f32 -> i64 conversion
+    // This algorithm comes from compiler-rt's implementation of fixsfdi:
+    // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  VT.getSizeInBits());
+    SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+    SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+    SDValue Bias = DAG.getConstant(127, IntVT);
+    SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+                                       IntVT);
+    SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+    SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+    SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+        DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
+    SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+    SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+        DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
+    Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+    SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+        DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+        DAG.getConstant(0x00800000, IntVT));
+
+    R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+    R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+       DAG.getNode(ISD::SHL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       DAG.getNode(ISD::SRL, dl, NVT, R,
+                   DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+                      dl, TLI.getShiftAmountTy(IntVT))),
+       ISD::SETGT);
+
+    SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+        DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+        Sign);
+
+    Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+        DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+    break;
+  }
   case ISD::FP_TO_UINT: {
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 639eb462e7ff..5923f0e208e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2084,7 +2084,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
-      computeKnownBitsLoad(*Ranges, KnownZero);
+      computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
     }
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 136baf56e8a7..e6dc27219787 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5439,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
                                       MachineBasicBlock *LandingPad) {
+  const TargetLowering *TLI = TM.getTargetLowering();
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   Type *RetTy = FTy->getReturnType();
@@ -5449,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   TargetLowering::ArgListEntry Entry;
   Args.reserve(CS.arg_size());
 
-  // Check whether the function can return without sret-demotion.
-  SmallVector<ISD::OutputArg, 4> Outs;
-  const TargetLowering *TLI = TM.getTargetLowering();
-  GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
-
-  bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
-                                            DAG.getMachineFunction(),
-                                            FTy->isVarArg(), Outs,
-                                            FTy->getContext());
-
-  SDValue DemoteStackSlot;
-  int DemoteStackIdx = -100;
-
-  if (!CanLowerReturn) {
-    assert(!CS.hasInAllocaArgument() &&
-           "sret demotion is incompatible with inalloca");
-    uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
-                      FTy->getReturnType());
-    unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
-                      FTy->getReturnType());
-    MachineFunction &MF = DAG.getMachineFunction();
-    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
-    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
-    Entry.Node = DemoteStackSlot;
-    Entry.Ty = StackSlotPtrType;
-    Entry.isSExt = false;
-    Entry.isZExt = false;
-    Entry.isInReg = false;
-    Entry.isSRet = true;
-    Entry.isNest = false;
-    Entry.isByVal = false;
-    Entry.isReturned = false;
-    Entry.Alignment = Align;
-    Args.push_back(Entry);
-    RetTy = Type::getVoidTy(FTy->getContext());
-  }
-
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     const Value *V = *i;
@@ -5540,46 +5502,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
          "Null value expected with tail call!");
-  if (Result.first.getNode()) {
+  if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
-  } else if (!CanLowerReturn && Result.second.getNode()) {
-    // The instruction result is the result of loading from the
-    // hidden sret parameter.
-    SmallVector<EVT, 1> PVTs;
-    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
-    ComputeValueVTs(*TLI, PtrRetTy, PVTs);
-    assert(PVTs.size() == 1 && "Pointers should fit in one register");
-    EVT PtrVT = PVTs[0];
-
-    SmallVector<EVT, 4> RetTys;
-    SmallVector<uint64_t, 4> Offsets;
-    RetTy = FTy->getReturnType();
-    ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
-
-    unsigned NumValues = RetTys.size();
-    SmallVector<SDValue, 4> Values(NumValues);
-    SmallVector<SDValue, 4> Chains(NumValues);
-
-    for (unsigned i = 0; i < NumValues; ++i) {
-      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT,
-                                DemoteStackSlot,
-                                DAG.getConstant(Offsets[i], PtrVT));
-      SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add,
-                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, false, 1);
-      Values[i] = L;
-      Chains[i] = L.getValue(1);
-    }
-
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
-                                MVT::Other, Chains);
-    PendingLoads.push_back(Chain);
-
-    setValue(CS.getInstruction(),
-             DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
-                         DAG.getVTList(RetTys), Values));
-  }
 
   if (!Result.second.getNode()) {
     // As a special case, a null chain means that a tail call has been emitted
@@ -7121,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
   FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
 }
 
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+  SmallVector<Attribute::AttrKind, 2> Attrs;
+  if (CLI.RetSExt)
+    Attrs.push_back(Attribute::SExt);
+  if (CLI.RetZExt)
+    Attrs.push_back(Attribute::ZExt);
+  if (CLI.IsInReg)
+    Attrs.push_back(Attribute::InReg);
+
+  return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+                           Attrs);
+}
+
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
 /// implementation, which just calls LowerCall.
 /// FIXME: When all targets are
@@ -7129,24 +7068,62 @@ std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   // Handle the incoming return values from the call.
   CLI.Ins.clear();
+  Type *OrigRetTy = CLI.RetTy;
   SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.ArgVT = VT;
-      MyFlags.Used = CLI.IsReturnValueUsed;
-      if (CLI.RetSExt)
-        MyFlags.Flags.setSExt();
-      if (CLI.RetZExt)
-        MyFlags.Flags.setZExt();
-      if (CLI.IsInReg)
-        MyFlags.Flags.setInReg();
-      CLI.Ins.push_back(MyFlags);
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+
+  bool CanLowerReturn =
+      this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+                           CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+  SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
+  if (!CanLowerReturn) {
+    // FIXME: equivalent assert?
+    // assert(!CS.hasInAllocaArgument() &&
+    //        "sret demotion is incompatible with inalloca");
+    uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
+    unsigned Align  = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+    MachineFunction &MF = CLI.DAG.getMachineFunction();
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+    ArgListEntry Entry;
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.isReturned = false;
+    Entry.Alignment = Align;
+    CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+    CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+  } else {
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags;
+        MyFlags.VT = RegisterVT;
+        MyFlags.ArgVT = VT;
+        MyFlags.Used = CLI.IsReturnValueUsed;
+        if (CLI.RetSExt)
+          MyFlags.Flags.setSExt();
+        if (CLI.RetZExt)
+          MyFlags.Flags.setZExt();
+        if (CLI.IsInReg)
+          MyFlags.Flags.setInReg();
+        CLI.Ins.push_back(MyFlags);
+      }
     }
   }
 
@@ -7289,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                  "LowerCall emitted a value with the wrong type!");
         });
 
-  // Collect the legal value parts into potentially illegal values
-  // that correspond to the original function's return values.
-  ISD::NodeType AssertOp = ISD::DELETED_NODE;
-  if (CLI.RetSExt)
-    AssertOp = ISD::AssertSext;
-  else if (CLI.RetZExt)
-    AssertOp = ISD::AssertZext;
   SmallVector<SDValue, 4> ReturnValues;
-  unsigned CurReg = 0;
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-
-    ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT, nullptr,
-                                            AssertOp));
-    CurReg += NumRegs;
-  }
-
-  // For a function returning void, there is no return value. We can't create
-  // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actually look at the value.
-  if (ReturnValues.empty())
-    return std::make_pair(SDValue(), CLI.Chain);
+  if (!CanLowerReturn) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+    ComputeValueVTs(*this, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+
+    unsigned NumValues = RetTys.size();
+    ReturnValues.resize(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+                                    CLI.DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = CLI.DAG.getLoad(
+          RetTys[i], CLI.DL, CLI.Chain, Add,
+          MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
+          false, false, 1);
+      ReturnValues[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+  } else {
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    if (CLI.RetSExt)
+      AssertOp = ISD::AssertSext;
+    else if (CLI.RetZExt)
+      AssertOp = ISD::AssertZext;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+      ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+                                              NumRegs, RegisterVT, VT, nullptr,
+                                              AssertOp));
+      CurReg += NumRegs;
+    }
+
+    // For a function returning void, there is no return value. We can't create
+    // such a node, so we just return a null return value in that case. In
+    // that case, nothing will actually look at the value.
+    if (ReturnValues.empty())
+      return std::make_pair(SDValue(), CLI.Chain);
+  }
 
   SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
                                 CLI.DAG.getVTList(RetTys), ReturnValues);
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 9154fe2f5ff4..553ceb4575d5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -166,7 +166,7 @@ void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
 }
 
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
         << "\' to [" << Addr << "]\n";);
@@ -184,14 +184,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
 }
 
 void ExecutionEngine::clearAllGlobalMappings() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   EEState.getGlobalAddressMap().clear();
   EEState.getGlobalAddressReverseMap().clear();
 }
 
 void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
     EEState.RemoveMapping(FI);
@@ -201,7 +201,7 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
 }
 
 void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy &Map =
     EEState.getGlobalAddressMap();
@@ -228,7 +228,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 }
 
 void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
     EEState.getGlobalAddressMap().find(GV);
@@ -236,7 +236,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
 }
 
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap().empty()) {
@@ -555,7 +555,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
     return getPointerToFunction(F);
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   if (void *P = EEState.getGlobalAddressMap()[GV])
     return P;
 
@@ -1346,7 +1346,7 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
   : EE(EE), GlobalAddressMap(this) {
 }
 
-sys::Mutex *
+std::recursive_mutex *
 ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
   return &EES->EE.lock;
 }
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 83ec9784b98e..463fa299b3f3 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -96,18 +96,18 @@ namespace {
 /// bugpoint or gdb users to search for a function by name without any context.
 class JitPool {
   SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
-  mutable sys::Mutex Lock;
+  mutable std::recursive_mutex Lock;
 public:
   void Add(JIT *jit) {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     JITs.insert(jit);
   }
   void Remove(JIT *jit) {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     JITs.erase(jit);
   }
   void *getPointerToNamedFunction(const char *Name) const {
-    MutexGuard guard(Lock);
+    std::lock_guard<std::recursive_mutex> guard(Lock);
     assert(JITs.size() != 0 && "No Jit registered");
     //search function in every instance of JIT
     for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
@@ -150,7 +150,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   AllJits->Add(this);
 
   // Add target data
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   FunctionPassManager &PM = jitstate->getPM();
   M->setDataLayout(TM.getDataLayout());
   PM.add(new DataLayoutPass(M));
@@ -177,7 +177,7 @@ JIT::~JIT() {
 /// addModule - Add a new Module to the JIT.  If we previously removed the last
 /// Module, we need re-initialize jitstate with a valid Module.
 void JIT::addModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (Modules.empty()) {
     assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
@@ -206,7 +206,7 @@ void JIT::addModule(Module *M) {
 bool JIT::removeModule(Module *M) {
   bool result = ExecutionEngine::removeModule(M);
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (jitstate && jitstate->getModule() == M) {
     delete jitstate;
@@ -408,13 +408,13 @@ GenericValue JIT::runFunction(Function *F,
 void JIT::RegisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   EventListeners.push_back(L);
 }
 void JIT::UnregisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   std::vector<JITEventListener*>::reverse_iterator I=
       std::find(EventListeners.rbegin(), EventListeners.rend(), L);
   if (I != EventListeners.rend()) {
@@ -426,14 +426,14 @@ void JIT::NotifyFunctionEmitted(
     const Function &F,
     void *Code, size_t Size,
     const JITEvent_EmittedFunctionDetails &Details) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
   }
 }
 
 void JIT::NotifyFreeingMachineCode(void *OldPtr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
   }
@@ -444,7 +444,7 @@ void JIT::NotifyFreeingMachineCode(void *OldPtr) {
 /// GlobalAddress[F] with the address of F's machine code.
 ///
 void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   class MCIListener : public JITEventListener {
     MachineCodeInfo *const MCI;
@@ -505,7 +505,7 @@ void *JIT::getPointerToFunction(Function *F) {
   if (void *Addr = getPointerToGlobalIfAvailable(F))
     return Addr;   // Check if function already code gen'd
 
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // Now that this thread owns the lock, make sure we read in the function if it
   // exists in this Module.
@@ -534,7 +534,7 @@ void *JIT::getPointerToFunction(Function *F) {
 }
 
 void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap().find(BB);
@@ -546,7 +546,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
 }
 
 void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   getBasicBlockAddressMap().erase(BB);
 }
 
@@ -555,7 +555,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
   (void)getPointerToFunction(BB->getParent());
 
   // resolve basic block address
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   BasicBlockAddressMapTy::iterator I =
     getBasicBlockAddressMap().find(BB);
@@ -592,7 +592,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
 /// variable, possibly emitting it to memory if needed.  This is used by the
 /// Emitter.
 void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   void *Ptr = getPointerToGlobalIfAvailable(GV);
   if (Ptr) return Ptr;
@@ -666,7 +666,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
   size_t S = getDataLayout()->getTypeAllocSize(GlobalType);
   size_t A = getDataLayout()->getPreferredAlignment(GV);
   if (GV->isThreadLocal()) {
-    MutexGuard locked(lock);
+    std::lock_guard<std::recursive_mutex> locked(lock);
     Ptr = TJI.allocateThreadLocalMemory(S);
   } else if (TJI.allocateSeparateGVMemory()) {
     if (A <= 8) {
@@ -687,7 +687,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
 }
 
 void JIT::addPendingFunction(Function *F) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   jitstate->getPendingFunctions().push_back(F);
 }
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index b6f90e2de05a..1e1f5d55802c 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -40,7 +40,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -50,6 +49,7 @@
 #ifndef NDEBUG
 #include <iomanip>
 #endif
+#include <mutex>
 using namespace llvm;
 
 #define DEBUG_TYPE "jit"
@@ -121,21 +121,16 @@ namespace {
 #endif
     }
 
-    FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
-      const MutexGuard& locked) {
-      assert(locked.holds(TheJIT->lock));
+    FunctionToLazyStubMapTy& getFunctionToLazyStubMap() {
       return FunctionToLazyStubMap;
     }
 
-    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
-      assert(lck.holds(TheJIT->lock));
+    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() {
       return GlobalToIndirectSymMap;
     }
 
     std::pair<void *, Function *> LookupFunctionFromCallSite(
-        const MutexGuard &locked, void *CallSite) const {
-      assert(locked.holds(TheJIT->lock));
-
+        void *CallSite) const {
       // The address given to us for the stub may not be exactly right, it
       // might be a little bit after the stub.  As such, use upper_bound to
       // find it.
@@ -147,9 +142,7 @@ namespace {
       return *I;
     }
 
-    void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
-      assert(locked.holds(TheJIT->lock));
-
+    void AddCallSite(void *CallSite, Function *F) {
       bool Inserted = CallSiteToFunctionMap.insert(
           std::make_pair(CallSite, F)).second;
       (void)Inserted;
@@ -237,22 +230,22 @@ namespace {
     std::map<void*, JITResolver*> Map;
 
     /// Guards Map from concurrent accesses.
-    mutable sys::Mutex Lock;
+    mutable std::recursive_mutex Lock;
 
   public:
     /// Registers a Stub to be resolved by Resolver.
     void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       Map.insert(std::make_pair(Stub, Resolver));
     }
     /// Unregisters the Stub when it's invalidated.
     void UnregisterStubResolver(void *Stub) {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       Map.erase(Stub);
     }
     /// Returns the JITResolver instance that owns the Stub.
     JITResolver *getResolverFromStub(void *Stub) const {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       // The address given to us for the stub may not be exactly right, it might
       // be a little bit after the stub.  As such, use upper_bound to find it.
       // This is the same trick as in LookupFunctionFromCallSite from
@@ -265,7 +258,7 @@ namespace {
     /// True if any stubs refer to the given resolver. Only used in an assert().
     /// O(N)
     bool ResolverHasStubs(JITResolver* Resolver) const {
-      MutexGuard guard(Lock);
+      std::lock_guard<std::recursive_mutex> guard(Lock);
       for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
              E = Map.end(); I != E; ++I) {
         if (I->second == Resolver)
@@ -501,19 +494,19 @@ JITResolver::~JITResolver() {
 /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
 /// if it has already been created.
 void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a stub for this function, recycle it.
-  return state.getFunctionToLazyStubMap(locked).lookup(F);
+  return state.getFunctionToLazyStubMap().lookup(F);
 }
 
 /// getFunctionStub - This returns a pointer to a function stub, creating
 /// one on demand as needed.
 void *JITResolver::getLazyFunctionStub(Function *F) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a lazy stub for this function, recycle it.
-  void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+  void *&Stub = state.getFunctionToLazyStubMap()[F];
   if (Stub) return Stub;
 
   // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
@@ -555,7 +548,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
 
     // Finally, keep track of the stub-to-Function mapping so that the
     // JITCompilerFn knows which function to compile!
-    state.AddCallSite(locked, Stub, F);
+    state.AddCallSite(Stub, F);
   } else if (!Actual) {
     // If we are JIT'ing non-lazily but need to call a function that does not
     // exist yet, add it to the JIT's work list so that we can fill in the
@@ -571,10 +564,10 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
 /// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
 /// GV address.
 void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
-  MutexGuard locked(TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> guard(TheJIT->lock);
 
   // If we already have a stub for this global variable, recycle it.
-  void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+  void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV];
   if (IndirectSym) return IndirectSym;
 
   // Otherwise, codegen a new indirect symbol.
@@ -629,12 +622,12 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // Only lock for getting the Function. The call getPointerToFunction made
     // in this function might trigger function materializing, which requires
     // JIT lock to be unlocked.
-    MutexGuard locked(JR->TheJIT->lock);
+    std::lock_guard<std::recursive_mutex> guard(JR->TheJIT->lock);
 
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
     std::pair<void*, Function*> I =
-      JR->state.LookupFunctionFromCallSite(locked, Stub);
+      JR->state.LookupFunctionFromCallSite(Stub);
     F = I.second;
     ActualPtr = I.first;
   }
@@ -661,7 +654,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
   }
 
   // Reacquire the lock to update the GOT map.
-  MutexGuard locked(JR->TheJIT->lock);
+  std::lock_guard<std::recursive_mutex> locked(JR->TheJIT->lock);
 
   // We might like to remove the call site from the CallSiteToFunction map, but
   // we can't do that! Multiple threads could be stuck, waiting to acquire the
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index e9ba96a6496f..f149edcb7b04 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MutexGuard.h"
 #include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
@@ -66,7 +65,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
 }
 
 MCJIT::~MCJIT() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -102,12 +101,12 @@ MCJIT::~MCJIT() {
 }
 
 void MCJIT::addModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   OwnedModules.addModule(M);
 }
 
 bool MCJIT::removeModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   return OwnedModules.removeModule(M);
 }
 
@@ -129,12 +128,12 @@ void MCJIT::addArchive(object::Archive *A) {
 
 
 void MCJIT::setObjectCache(ObjectCache* NewCache) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   ObjCache = NewCache;
 }
 
 ObjectBufferStream* MCJIT::emitObject(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added but not loaded to this
   // MCJIT instance, since these conditions are tested by our caller,
@@ -174,7 +173,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) {
 
 void MCJIT::generateCodeForModule(Module *M) {
   // Get a thread lock to make sure we aren't trying to load multiple times
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added to this MCJIT instance.
   assert(OwnedModules.ownsModule(M) &&
@@ -214,7 +213,7 @@ void MCJIT::generateCodeForModule(Module *M) {
 }
 
 void MCJIT::finalizeLoadedModules() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // Resolve any outstanding relocations.
   Dyld.resolveRelocations();
@@ -230,7 +229,7 @@ void MCJIT::finalizeLoadedModules() {
 
 // FIXME: Rename this.
 void MCJIT::finalizeObject() {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
                               E = OwnedModules.end_added();
@@ -243,7 +242,7 @@ void MCJIT::finalizeObject() {
 }
 
 void MCJIT::finalizeModule(Module *M) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // This must be a module which has already been added to this MCJIT instance.
   assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module.");
@@ -268,7 +267,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
                                    bool CheckFunctionsOnly) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // If it hasn't already been generated, see if it's in one of our modules.
   for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
@@ -292,7 +291,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
                                  bool CheckFunctionsOnly)
 {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   // First, check to see if we already have this symbol.
   uint64_t Addr = getExistingSymbolAddress(Name);
@@ -336,7 +335,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
 }
 
 uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   uint64_t Result = getSymbolAddress(Name, false);
   if (Result != 0)
     finalizeLoadedModules();
@@ -344,7 +343,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
 }
 
 uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   uint64_t Result = getSymbolAddress(Name, true);
   if (Result != 0)
     finalizeLoadedModules();
@@ -353,7 +352,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
 
 // Deprecated.  Use getFunctionAddress instead.
 void *MCJIT::getPointerToFunction(Function *F) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -552,13 +551,13 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name,
 void MCJIT::RegisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   EventListeners.push_back(L);
 }
 void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   SmallVector<JITEventListener*, 2>::reverse_iterator I=
       std::find(EventListeners.rbegin(), EventListeners.rend(), L);
   if (I != EventListeners.rend()) {
@@ -567,14 +566,14 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   }
 }
 void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   MemMgr.notifyObjectLoaded(this, &Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyObjectEmitted(Obj);
   }
 }
 void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) {
-  MutexGuard locked(lock);
+  std::lock_guard<std::recursive_mutex> locked(lock);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyFreeingObject(Obj);
   }
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index aa26cff6a7b6..5851625383b9 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -278,35 +278,48 @@ bool Constant::canTrap() const {
   return canTrapImpl(this, NonTrappingOps);
 }
 
-/// isThreadDependent - Return true if the value can vary between threads.
-bool Constant::isThreadDependent() const {
-  SmallPtrSet<const Constant*, 64> Visited;
-  SmallVector<const Constant*, 64> WorkList;
-  WorkList.push_back(this);
-  Visited.insert(this);
+/// Check if C contains a GlobalValue for which Predicate is true.
+static bool
+ConstHasGlobalValuePredicate(const Constant *C,
+                             bool (*Predicate)(const GlobalValue *)) {
+  SmallPtrSet<const Constant *, 8> Visited;
+  SmallVector<const Constant *, 8> WorkList;
+  WorkList.push_back(C);
+  Visited.insert(C);
 
   while (!WorkList.empty()) {
-    const Constant *C = WorkList.pop_back_val();
-
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-      if (GV->isThreadLocal())
+    const Constant *WorkItem = WorkList.pop_back_val();
+    if (const auto *GV = dyn_cast<GlobalValue>(WorkItem))
+      if (Predicate(GV))
         return true;
-    }
-
-    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
-      const Constant *D = dyn_cast<Constant>(C->getOperand(I));
-      if (!D)
+    for (const Value *Op : WorkItem->operands()) {
+      const Constant *ConstOp = dyn_cast<Constant>(Op);
+      if (!ConstOp)
         continue;
-      if (Visited.insert(D))
-        WorkList.push_back(D);
+      if (Visited.insert(ConstOp))
+        WorkList.push_back(ConstOp);
     }
   }
-
   return false;
 }
 
-/// isConstantUsed - Return true if the constant has users other than constant
-/// exprs and other dangling things.
+/// Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+  auto DLLImportPredicate = [](const GlobalValue *GV) {
+    return GV->isThreadLocal();
+  };
+  return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+bool Constant::isDLLImportDependent() const {
+  auto DLLImportPredicate = [](const GlobalValue *GV) {
+    return GV->hasDLLImportStorageClass();
+  };
+  return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+/// Return true if the constant has users other than constant exprs and other
+/// dangling things.
 bool Constant::isConstantUsed() const {
   for (const User *U : users()) {
     const Constant *UC = dyn_cast<Constant>(U);
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 197b6cb9054e..68b9baa2baa2 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -2702,11 +2702,10 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM) {
 /*===-- Threading ------------------------------------------------------===*/
 
 LLVMBool LLVMStartMultithreaded() {
-  return llvm_start_multithreaded();
+  return LLVMIsMultithreaded();
 }
 
 void LLVMStopMultithreaded() {
-  llvm_stop_multithreaded();
 }
 
 LLVMBool LLVMIsMultithreaded() {
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 6eeb16220eac..27270636004f 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -128,7 +128,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
 }
 
 bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const {
-  return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr;
+  return getDebugLoc().isUnknown() == false;
 }
 
 void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename,
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 160ddc9e81a7..5410cc031dad 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -59,7 +59,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setDLLStorageClass(Src->getDLLStorageClass());
 }
 
-const GlobalObject *getBaseObject(const Constant &C) {
+static const GlobalObject *getBaseObject(const Constant &C) {
   // FIXME: We should probably return a base + offset pair for non-zero GEPs.
   return dyn_cast<GlobalObject>(C.stripPointerCasts());
 }
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index d3f3482dc024..d14f139db185 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -607,8 +607,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
   // If P is an analysis pass and it is available then do not
   // generate the analysis again. Stale analysis info should not be
   // available at this point.
-  const PassInfo *PI =
-    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  const PassInfo *PI = P->getPassInfo();
   if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
     delete P;
     return;
@@ -723,8 +722,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
       return *I;
 
     // If Pass not found then check the interfaces implemented by Immutable Pass
-    const PassInfo *PassInf =
-      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    const PassInfo *PassInf = (*I)->getPassInfo();
     assert(PassInf && "Expected all immutable passes to be initialized");
     const std::vector<const PassInfo*> &ImmPI =
       PassInf->getInterfacesImplemented();
@@ -766,8 +764,7 @@ void PMTopLevelManager::dumpArguments() const {
   dbgs() << "Pass Arguments: ";
   for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
        ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
-    if (const PassInfo *PI =
-        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+    if (const PassInfo *PI = (*I)->getPassInfo()) {
       assert(PI && "Expected all immutable passes to be initialized");
       if (!PI->isAnalysisGroup())
         dbgs() << " -" << PI->getPassArgument();
@@ -831,8 +828,8 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {
 
   // This pass is the current implementation of all of the interfaces it
   // implements as well.
-  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
-  if (!PInf) return;
+  const PassInfo *PInf = P->getPassInfo();
+  if (PInf == 0) return;
   const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
   for (unsigned i = 0, e = II.size(); i != e; ++i)
     AvailableAnalysis[II[i]->getTypeInfo()] = P;
@@ -963,10 +960,9 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
     P->releaseMemory();
   }
 
-  AnalysisID PI = P->getPassID();
-  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+  if (const PassInfo *PInf = P->getPassInfo()) {
     // Remove the pass itself (if it is not already removed).
-    AvailableAnalysis.erase(PI);
+    AvailableAnalysis.erase(P->getPassID());
 
     // Remove all interfaces this pass implements, for which it is also
     // listed as the available implementation.
@@ -1148,8 +1144,7 @@ void PMDataManager::dumpPassArguments() const {
     if (PMDataManager *PMD = (*I)->getAsPMDataManager())
       PMD->dumpPassArguments();
     else
-      if (const PassInfo *PI =
-            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+      if (const PassInfo *PI = (*I)->getPassInfo())
         if (!PI->isAnalysisGroup())
           dbgs() << " -" << PI->getPassArgument();
   }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index d1c7f7d25c39..ad3c29c564e7 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2233,7 +2233,8 @@ void Verifier::visitInstruction(Instruction &I) {
   }
 
   MDNode *MD = I.getMetadata(LLVMContext::MD_range);
-  Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+  Assert1(!MD || isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+          "Ranges are only for loads, calls and invokes!", &I);
 
   InstsInThisBlock.insert(&I);
 }
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 9009958613ed..a1709f60fff2 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -124,23 +124,7 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
 }
 
 void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
-  Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption;
-  Options.NoFramePointerElim = options.NoFramePointerElim;
-  Options.AllowFPOpFusion = options.AllowFPOpFusion;
-  Options.UnsafeFPMath = options.UnsafeFPMath;
-  Options.NoInfsFPMath = options.NoInfsFPMath;
-  Options.NoNaNsFPMath = options.NoNaNsFPMath;
-  Options.HonorSignDependentRoundingFPMathOption =
-    options.HonorSignDependentRoundingFPMathOption;
-  Options.UseSoftFloat = options.UseSoftFloat;
-  Options.FloatABIType = options.FloatABIType;
-  Options.NoZerosInBSS = options.NoZerosInBSS;
-  Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt;
-  Options.DisableTailCalls = options.DisableTailCalls;
-  Options.StackAlignmentOverride = options.StackAlignmentOverride;
-  Options.TrapFuncName = options.TrapFuncName;
-  Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
-  Options.UseInitArray = options.UseInitArray;
+  Options = options;
 }
 
 void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 8e205bb621e9..688b13f228d0 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1141,7 +1141,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
     // Perform the merge for standard behavior types.
     switch (SrcBehaviorValue) {
     case Module::Require:
-    case Module::Override: assert(0 && "not possible"); break;
+    case Module::Override: llvm_unreachable("not possible");
     case Module::Error: {
       // Emit an error if the values differ.
       if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 6a384c1a8e1c..78bd8c4ba144 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMMC
+  ConstantPools.cpp
   ELFObjectWriter.cpp
   MCAsmBackend.cpp
   MCAsmInfo.cpp
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
new file mode 100644
index 000000000000..f979dad47da5
--- /dev/null
+++ b/lib/MC/ConstantPools.cpp
@@ -0,0 +1,95 @@
+//===- ConstantPools.cpp - ConstantPool class --*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ConstantPool and  AssemblerConstantPools classes.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/ConstantPools.h"
+
+using namespace llvm;
+//
+// ConstantPool implementation
+//
+// Emit the contents of the constant pool using the provided streamer.
+void ConstantPool::emitEntries(MCStreamer &Streamer) {
+  if (Entries.empty())
+    return;
+  Streamer.EmitCodeAlignment(4); // align to 4-byte address
+  Streamer.EmitDataRegion(MCDR_DataRegion);
+  for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    Streamer.EmitLabel(I->first);
+    Streamer.EmitValue(I->second, 4);
+  }
+  Streamer.EmitDataRegion(MCDR_DataRegionEnd);
+  Entries.clear();
+}
+
+const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
+  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
+
+  Entries.push_back(std::make_pair(CPEntryLabel, Value));
+  return MCSymbolRefExpr::Create(CPEntryLabel, Context);
+}
+
+bool ConstantPool::empty() { return Entries.empty(); }
+
+//
+// AssemblerConstantPools implementation
+//
+ConstantPool *
+AssemblerConstantPools::getConstantPool(const MCSection *Section) {
+  ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
+  if (CP == ConstantPools.end())
+    return nullptr;
+
+  return &CP->second;
+}
+
+ConstantPool &
+AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
+  return ConstantPools[Section];
+}
+
+static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
+                             ConstantPool &CP) {
+  if (!CP.empty()) {
+    Streamer.SwitchSection(Section);
+    CP.emitEntries(Streamer);
+  }
+}
+
+void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
+  // Dump contents of assembler constant pools.
+  for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
+                                   CPE = ConstantPools.end();
+       CPI != CPE; ++CPI) {
+    const MCSection *Section = CPI->first;
+    ConstantPool &CP = CPI->second;
+
+    emitConstantPool(Streamer, Section, CP);
+  }
+}
+
+void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
+  const MCSection *Section = Streamer.getCurrentSection().first;
+  if (ConstantPool *CP = getConstantPool(Section)) {
+    emitConstantPool(Streamer, Section, *CP);
+  }
+}
+
+const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
+                                               const MCExpr *Expr) {
+  const MCSection *Section = Streamer.getCurrentSection().first;
+  return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
+}
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index a98a13e0cd42..87f6ec0f3d13 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1574,8 +1574,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
     break;
 
   default:
-    assert(0 && "FIXME: sh_type value not supported!");
-    break;
+    llvm_unreachable("FIXME: sh_type value not supported!");
   }
 
   if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index cc98be6ea63f..bd2c4e960ac4 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -340,6 +340,29 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
   return !MCDwarfFiles[FileNumber].Name.empty();
 }
 
+/// finalizeDwarfSections - Emit end symbols for each non-empty code section.
+/// Also remove empty sections from SectionStartEndSyms, to avoid generating
+/// useless debug info for them.
+void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
+  MCContext &context = MCOS.getContext();
+
+  auto sec = SectionStartEndSyms.begin();
+  while (sec != SectionStartEndSyms.end()) {
+    assert(sec->second.first && "Start symbol must be set by now");
+    MCOS.SwitchSection(sec->first);
+    if (MCOS.mayHaveInstructions()) {
+      MCSymbol *SectionEndSym = context.CreateTempSymbol();
+      MCOS.EmitLabel(SectionEndSym);
+      sec->second.second = SectionEndSym;
+      ++sec;
+    } else {
+      MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >::iterator
+        to_erase = sec;
+      sec = SectionStartEndSyms.erase(to_erase);
+    }
+  }
+}
+
 void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const {
   // If we have a source manager and a location, use it. Otherwise just
   // use the generic report_fatal_error().
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index be6731abedd9..3bcff86b1c65 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -518,8 +519,12 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
   MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
   MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
   EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
-  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
-  EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) {
+    EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
+  } else {
+    EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+    EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  }
   EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
   if (!context.getCompilationDir().empty())
     EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
@@ -552,20 +557,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
 }
 
 // When generating dwarf for assembly source files this emits the data for
-// .debug_aranges section.  Which contains a header and a table of pairs of
-// PointerSize'ed values for the address and size of section(s) with line table
-// entries (just the default .text in our case) and a terminating pair of zeros.
+// .debug_aranges section. This section contains a header and a table of pairs
+// of PointerSize'ed values for the address and size of section(s) with line
+// table entries.
 static void EmitGenDwarfAranges(MCStreamer *MCOS,
                                 const MCSymbol *InfoSectionSymbol) {
   MCContext &context = MCOS->getContext();
 
-  // Create a symbol at the end of the section that we are creating the dwarf
-  // debugging info to use later in here as part of the expression to calculate
-  // the size of the section for the table.
-  MCOS->SwitchSection(context.getGenDwarfSection());
-  MCSymbol *SectionEndSym = context.CreateTempSymbol();
-  MCOS->EmitLabel(SectionEndSym);
-  context.setGenDwarfSectionEndSym(SectionEndSym);
+  auto &Sections = context.getGenDwarfSectionSyms();
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
 
@@ -583,8 +582,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
   Length += Pad;
 
   // Add the size of the pair of PointerSize'ed values for the address and size
-  // of the one default .text section we have in the table.
-  Length += 2 * AddrSize;
+  // of each section we have in the table.
+  Length += 2 * AddrSize * Sections.size();
   // And the pair of terminating zeros.
   Length += 2 * AddrSize;
 
@@ -608,14 +607,21 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
   for(int i = 0; i < Pad; i++)
     MCOS->EmitIntValue(0, 1);
 
-  // Now emit the table of pairs of PointerSize'ed values for the section(s)
-  // address and size, in our case just the one default .text section.
-  const MCExpr *Addr = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
-  const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
-    *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
-  MCOS->EmitValue(Addr, AddrSize);
-  MCOS->EmitAbsValue(Size, AddrSize);
+  // Now emit the table of pairs of PointerSize'ed values for the section
+  // addresses and sizes.
+  for (const auto &sec : Sections) {
+    MCSymbol *StartSymbol = sec.second.first;
+    MCSymbol *EndSymbol = sec.second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    const MCExpr *Addr = MCSymbolRefExpr::Create(
+      StartSymbol, MCSymbolRefExpr::VK_None, context);
+    const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+      *StartSymbol, *EndSymbol, 0);
+    MCOS->EmitValue(Addr, AddrSize);
+    MCOS->EmitAbsValue(Size, AddrSize);
+  }
 
   // And finally the pair of terminating zeros.
   MCOS->EmitIntValue(0, AddrSize);
@@ -627,7 +633,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
 // DIE and a list of label DIEs.
 static void EmitGenDwarfInfo(MCStreamer *MCOS,
                              const MCSymbol *AbbrevSectionSymbol,
-                             const MCSymbol *LineSectionSymbol) {
+                             const MCSymbol *LineSectionSymbol,
+                             const MCSymbol *RangesSectionSymbol) {
   MCContext &context = MCOS->getContext();
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -674,15 +681,37 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     MCOS->EmitIntValue(0, 4);
   }
 
-  // AT_low_pc, the first address of the default .text section.
-  const MCExpr *Start = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
-  MCOS->EmitValue(Start, AddrSize);
+  if (RangesSectionSymbol) {
+    // There are multiple sections containing code, so we must use the
+    // .debug_ranges sections.
 
-  // AT_high_pc, the last address of the default .text section.
-  const MCExpr *End = MCSymbolRefExpr::Create(
-    context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
-  MCOS->EmitValue(End, AddrSize);
+    // AT_ranges, the 4 byte offset from the start of the .debug_ranges section
+    // to the address range list for this compilation unit.
+    MCOS->EmitSymbolValue(RangesSectionSymbol, 4);
+  } else {
+    // If we only have one non-empty code section, we can use the simpler
+    // AT_low_pc and AT_high_pc attributes.
+
+    // Find the first (and only) non-empty text section
+    auto &Sections = context.getGenDwarfSectionSyms();
+    const auto TextSection = Sections.begin();
+    assert(TextSection != Sections.end() && "No text section found");
+
+    MCSymbol *StartSymbol = TextSection->second.first;
+    MCSymbol *EndSymbol = TextSection->second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    // AT_low_pc, the first address of the default .text section.
+    const MCExpr *Start = MCSymbolRefExpr::Create(
+        StartSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitValue(Start, AddrSize);
+
+    // AT_high_pc, the last address of the default .text section.
+    const MCExpr *End = MCSymbolRefExpr::Create(
+      EndSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitValue(End, AddrSize);
+  }
 
   // AT_name, the name of the source file.  Reconstruct from the first directory
   // and file table entries.
@@ -766,13 +795,51 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
   MCOS->EmitLabel(InfoEnd);
 }
 
+// When generating dwarf for assembly source files this emits the data for
+// .debug_ranges section. We only emit one range list, which spans all of the
+// executable sections of this file.
+static void EmitGenDwarfRanges(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  auto &Sections = context.getGenDwarfSectionSyms();
+
+  const MCAsmInfo *AsmInfo = context.getAsmInfo();
+  int AddrSize = AsmInfo->getPointerSize();
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+
+  for (const auto sec : Sections) {
+
+    MCSymbol *StartSymbol = sec.second.first;
+    MCSymbol *EndSymbol = sec.second.second;
+    assert(StartSymbol && "StartSymbol must not be NULL");
+    assert(EndSymbol && "EndSymbol must not be NULL");
+
+    // Emit a base address selection entry for the start of this section
+    const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create(
+      StartSymbol, MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitFill(AddrSize, 0xFF);
+    MCOS->EmitValue(SectionStartAddr, AddrSize);
+
+    // Emit a range list entry spanning this section
+    const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS,
+      *StartSymbol, *EndSymbol, 0);
+    MCOS->EmitIntValue(0, AddrSize);
+    MCOS->EmitAbsValue(SectionSize, AddrSize);
+  }
+
+  // Emit end of list entry
+  MCOS->EmitIntValue(0, AddrSize);
+  MCOS->EmitIntValue(0, AddrSize);
+}
+
 //
 // When generating dwarf for assembly source files this emits the Dwarf
 // sections.
 //
 void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
-  // Create the dwarf sections in this order (.debug_line already created).
   MCContext &context = MCOS->getContext();
+
+  // Create the dwarf sections in this order (.debug_line already created).
   const MCAsmInfo *AsmInfo = context.getAsmInfo();
   bool CreateDwarfSectionSymbols =
       AsmInfo->doesDwarfUseRelocationsAcrossSections();
@@ -781,6 +848,22 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
     LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
   MCSymbol *AbbrevSectionSymbol = nullptr;
   MCSymbol *InfoSectionSymbol = nullptr;
+  MCSymbol *RangesSectionSymbol = NULL;
+
+  // Create end symbols for each section, and remove empty sections
+  MCOS->getContext().finalizeDwarfSections(*MCOS);
+
+  // If there are no sections to generate debug info for, we don't need
+  // to do anything
+  if (MCOS->getContext().getGenDwarfSectionSyms().empty())
+    return;
+
+  // We only need to use the .debug_ranges section if we have multiple
+  // code sections.
+  const bool UseRangesSection =
+      MCOS->getContext().getGenDwarfSectionSyms().size() > 1;
+  CreateDwarfSectionSymbols |= UseRangesSection;
+
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
   if (CreateDwarfSectionSymbols) {
     InfoSectionSymbol = context.CreateTempSymbol();
@@ -791,20 +874,30 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
     AbbrevSectionSymbol = context.CreateTempSymbol();
     MCOS->EmitLabel(AbbrevSectionSymbol);
   }
-  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+  if (UseRangesSection) {
+    MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+    if (CreateDwarfSectionSymbols) {
+      RangesSectionSymbol = context.CreateTempSymbol();
+      MCOS->EmitLabel(RangesSectionSymbol);
+    }
+  }
 
-  // If there are no line table entries then do not emit any section contents.
-  if (!context.hasMCLineSections())
-    return;
+  assert((RangesSectionSymbol != NULL) || !UseRangesSection);
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
 
   // Output the data for .debug_aranges section.
   EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
 
+  if (UseRangesSection)
+    EmitGenDwarfRanges(MCOS);
+
   // Output the data for .debug_abbrev section.
   EmitGenDwarfAbbrev(MCOS);
 
   // Output the data for .debug_info section.
-  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol,
+                   RangesSectionSymbol);
 }
 
 //
@@ -815,12 +908,13 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
 //
 void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
                                      SourceMgr &SrcMgr, SMLoc &Loc) {
-  // We won't create dwarf labels for temporary symbols or symbols not in
-  // the default text.
+  // We won't create dwarf labels for temporary symbols.
   if (Symbol->isTemporary())
     return;
   MCContext &context = MCOS->getContext();
-  if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
+  // We won't create dwarf labels for symbols in sections that we are not
+  // generating debug info for.
+  if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first))
     return;
 
   // The dwarf label's name does not have the symbol name's leading
@@ -1270,7 +1364,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
 
   // Version
   if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
-  streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+  // For DWARF2, we use CIE version 1
+  // For DWARF3+, we use CIE version 3
+  uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3;
+  streamer.EmitIntValue(CIEVersion, 1);
 
   // Augmentation String
   SmallString<8> Augmentation;
@@ -1298,7 +1395,14 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
 
   // Return Address Register
   if (verboseAsm) streamer.AddComment("CIE Return Address Column");
-  streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true));
+  if (CIEVersion == 1) {
+    assert(MRI->getRARegister() <= 255 &&
+           "DWARF 2 encodes return_address_register in one byte");
+    streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1);
+  } else {
+    streamer.EmitULEB128IntValue(
+        MRI->getDwarfRegNum(MRI->getRARegister(), true));
+  }
 
   // Augmentation Data Length (optional)
 
@@ -1435,13 +1539,12 @@ namespace {
       return CIEKey(nullptr, -1, 0, false, false);
     }
 
-    CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
-           unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) :
-      Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
-      LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
-      IsSimple(IsSimple_) {
-    }
-    const MCSymbol* Personality;
+    CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_,
+           unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_)
+        : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+          LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
+          IsSimple(IsSimple_) {}
+    const MCSymbol *Personality;
     unsigned PersonalityEncoding;
     unsigned LsdaEncoding;
     bool IsSignalFrame;
@@ -1516,7 +1619,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
   Emitter.setSectionStart(SectionStart);
 
   MCSymbol *FDEEnd = nullptr;
-  DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+  DenseMap<CIEKey, const MCSymbol *> CIEStarts;
 
   const MCSymbol *DummyDebugKey = nullptr;
   NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4f2740ed3ae9..5ab2829a6ba3 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -81,15 +81,7 @@ namespace {
                            unsigned char Value = 0) override { return false; }
 
     void EmitFileDirective(StringRef Filename) override {}
-    unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                    StringRef Filename,
-                                    unsigned CUID = 0) override {
-      return 0;
-    }
-    void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
-                               unsigned Column, unsigned Flags,
-                               unsigned Isa, unsigned Discriminator,
-                               StringRef FileName) override {}
+
     void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo&) override {}
 
     void EmitBundleAlignMode(unsigned AlignPow2) override {}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index bca516eca027..7991ef5fe09b 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -201,8 +201,8 @@ AsmToken AsmLexer::LexLineComment() {
     CurChar = getNextChar();
 
   if (CurChar == EOF)
-    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
-  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+  return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
 }
 
 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index cbff7beccae0..50375d37d841 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -345,8 +345,9 @@ class AsmParser : public MCAsmParser {
     DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
     DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
     DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
-    DK_IF, DK_IFNE, DK_IFB, DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF,
-    DK_IFNDEF, DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
+    DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB,
+    DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
+    DK_ELSEIF, DK_ELSE, DK_ENDIF,
     DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
     DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
     DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
@@ -433,8 +434,8 @@ class AsmParser : public MCAsmParser {
   bool parseDirectiveInclude(); // ".include"
   bool parseDirectiveIncbin(); // ".incbin"
 
-  // ".if" or ".ifne"
-  bool parseDirectiveIf(SMLoc DirectiveLoc);
+  // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne"
+  bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
   // ".ifb" or ".ifnb", depending on ExpectBlank.
   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
   // ".ifc" or ".ifnc", depending on ExpectEqual.
@@ -632,10 +633,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
     MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
-    getContext().setGenDwarfSectionStartSym(SectionStartSym);
+    auto InsertResult = getContext().addGenDwarfSection(
+        getStreamer().getCurrentSection().first);
+    assert(InsertResult.second && ".text section should not have debug info yet");
+    InsertResult.first->second.first = SectionStartSym;
     getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
         0, StringRef(), getContext().getMainFileName()));
   }
@@ -811,7 +814,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     // Parse symbol variant
     std::pair<StringRef, StringRef> Split;
     if (!MAI.useParensForSymbolVariant()) {
-      Split = Identifier.split('@');
+      if (FirstTokenKind == AsmToken::String) {
+        if (Lexer.is(AsmToken::At)) {
+          Lexer.Lex(); // eat @
+          SMLoc AtLoc = getLexer().getLoc();
+          StringRef VName;
+          if (parseIdentifier(VName))
+            return Error(AtLoc, "expected symbol variant after '@'");
+
+          Split = std::make_pair(Identifier, VName);
+        }
+      } else {
+        Split = Identifier.split('@');
+      }
     } else if (Lexer.is(AsmToken::LParen)) {
       Lexer.Lex(); // eat (
       StringRef VName;
@@ -1229,8 +1244,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
   default:
     break;
   case DK_IF:
+  case DK_IFEQ:
+  case DK_IFGE:
+  case DK_IFGT:
+  case DK_IFLE:
+  case DK_IFLT:
   case DK_IFNE:
-    return parseDirectiveIf(IDLoc);
+    return parseDirectiveIf(IDLoc, DirKind);
   case DK_IFB:
     return parseDirectiveIfb(IDLoc, true);
   case DK_IFNB:
@@ -1574,12 +1594,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
   }
 
-  // If we are generating dwarf for assembly source files and the current
-  // section is the initial text section then generate a .loc directive for
-  // the instruction.
+  // If we are generating dwarf for the current section then generate a .loc
+  // directive for the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() ==
-          getStreamer().getCurrentSection().first) {
+      getContext().getGenDwarfSectionSyms().count(
+        getStreamer().getCurrentSection().first)) {
 
     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
@@ -2011,7 +2030,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
           break;
 
       if (FAI >= NParameters) {
-	assert(M && "expected macro to be defined");
+    assert(M && "expected macro to be defined");
         Error(IDLoc,
               "parameter named '" + FA.Name + "' does not exist for macro '" +
               M->Name + "'");
@@ -3792,9 +3811,8 @@ bool AsmParser::parseDirectiveIncbin() {
 }
 
 /// parseDirectiveIf
-/// ::= .if expression
-/// ::= .ifne expression
-bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
+/// ::= .if{,eq,ge,gt,le,lt,ne} expression
+bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
   TheCondStack.push_back(TheCondState);
   TheCondState.TheCond = AsmCond::IfCond;
   if (TheCondState.Ignore) {
@@ -3809,6 +3827,29 @@ bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
 
     Lex();
 
+    switch (DirKind) {
+    default:
+      llvm_unreachable("unsupported directive");
+    case DK_IF:
+    case DK_IFNE:
+      break;
+    case DK_IFEQ:
+      ExprValue = ExprValue == 0;
+      break;
+    case DK_IFGE:
+      ExprValue = ExprValue >= 0;
+      break;
+    case DK_IFGT:
+      ExprValue = ExprValue > 0;
+      break;
+    case DK_IFLE:
+      ExprValue = ExprValue <= 0;
+      break;
+    case DK_IFLT:
+      ExprValue = ExprValue < 0;
+      break;
+    }
+
     TheCondState.CondMet = ExprValue;
     TheCondState.Ignore = !TheCondState.CondMet;
   }
@@ -4111,6 +4152,11 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
   DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
   DirectiveKindMap[".if"] = DK_IF;
+  DirectiveKindMap[".ifeq"] = DK_IFEQ;
+  DirectiveKindMap[".ifge"] = DK_IFGE;
+  DirectiveKindMap[".ifgt"] = DK_IFGT;
+  DirectiveKindMap[".ifle"] = DK_IFLE;
+  DirectiveKindMap[".iflt"] = DK_IFLT;
   DirectiveKindMap[".ifne"] = DK_IFNE;
   DirectiveKindMap[".ifb"] = DK_IFB;
   DirectiveKindMap[".ifnb"] = DK_IFNB;
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 78bb6c7aad0e..98b2b3bd60b2 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -150,7 +150,7 @@ class ELFAsmParser : public MCAsmParserExtension {
 
 private:
   bool ParseSectionName(StringRef &SectionName);
-  bool ParseSectionArguments(bool IsPush);
+  bool ParseSectionArguments(bool IsPush, SMLoc loc);
   unsigned parseSunStyleSectionFlags();
 };
 
@@ -382,7 +382,7 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() {
 bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
   getStreamer().PushSection();
 
-  if (ParseSectionArguments(/*IsPush=*/true)) {
+  if (ParseSectionArguments(/*IsPush=*/true, loc)) {
     getStreamer().PopSection();
     return true;
   }
@@ -397,11 +397,11 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 }
 
 // FIXME: This is a work in progress.
-bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
-  return ParseSectionArguments(/*IsPush=*/false);
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) {
+  return ParseSectionArguments(/*IsPush=*/false, loc);
 }
 
-bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
+bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   StringRef SectionName;
 
   if (ParseSectionName(SectionName))
@@ -545,10 +545,24 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
   }
 
   SectionKind Kind = computeSectionKind(Flags, Size);
-  getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
-                                                         Flags, Kind, Size,
-                                                         GroupName),
-                              Subsection);
+  const MCSection *ELFSection = getContext().getELFSection(
+      SectionName, Type, Flags, Kind, Size, GroupName);
+  getStreamer().SwitchSection(ELFSection, Subsection);
+
+  if (getContext().getGenDwarfForAssembly()) {
+    auto &Sections = getContext().getGenDwarfSectionSyms();
+    auto InsertResult = Sections.insert(
+        std::make_pair(ELFSection, std::make_pair(nullptr, nullptr)));
+    if (InsertResult.second) {
+      if (getContext().getDwarfVersion() <= 2)
+        Error(loc, "DWARF2 only supports one section per compilation unit");
+
+      MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol();
+      getStreamer().EmitLabel(SectionStartSymbol);
+      InsertResult.first->second.first = SectionStartSymbol;
+    }
+  }
+
   return false;
 }
 
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 8e946d57f7fb..efd724a15df6 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -14,6 +14,7 @@ namespace llvm {
 MCTargetOptions::MCTargetOptions()
     : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
       MCSaveTempLabels(false), MCUseDwarfDirectory(false),
-      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {}
+      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+      DwarfVersion(0) {}
 
 } // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 356288ffc51b..0a3e2cb790d0 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -17,8 +17,8 @@
 namespace llvm {
 using namespace object;
 
-ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
-                                                      bool BufferOwned) {
+static ErrorOr<ObjectFile *> createELFObjectFileAux(MemoryBuffer *Obj,
+                                                    bool BufferOwned) {
   std::pair<unsigned char, unsigned char> Ident = getElfArchType(Obj);
   std::size_t MaxAlignment =
     1ULL << countTrailingZeros(uintptr_t(Obj->getBufferStart()));
@@ -82,4 +82,12 @@ ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
   return R.release();
 }
 
+ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
+                                                      bool BufferOwned) {
+  ErrorOr<ObjectFile *> Ret = createELFObjectFileAux(Obj, BufferOwned);
+  if (BufferOwned && Ret.getError())
+    delete Obj;
+  return Ret;
+}
+
 } // end namespace llvm
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 48820361d4ff..004d8de065d8 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -23,7 +23,8 @@ using namespace object;
 IRObjectFile::IRObjectFile(MemoryBuffer *Object, std::error_code &EC,
                            LLVMContext &Context, bool BufferOwned)
     : SymbolicFile(Binary::ID_IR, Object, BufferOwned) {
-  ErrorOr<Module*> MOrErr = parseBitcodeFile(Object, Context);
+  ErrorOr<Module *> MOrErr =
+      getLazyBitcodeModule(Object, Context, /*BufferOwned*/ false);
   if ((EC = MOrErr.getError()))
     return;
 
@@ -104,11 +105,21 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
   return object_error::success;
 }
 
+static bool isDeclaration(const GlobalValue &V) {
+  if (V.hasAvailableExternallyLinkage())
+    return true;
+
+  if (V.isMaterializable())
+    return false;
+
+  return V.isDeclaration();
+}
+
 uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
   const GlobalValue &GV = getGV(Symb);
 
   uint32_t Res = BasicSymbolRef::SF_None;
-  if (GV.isDeclaration() || GV.hasAvailableExternallyLinkage())
+  if (isDeclaration(GV))
     Res |= BasicSymbolRef::SF_Undefined;
   if (GV.hasPrivateLinkage())
     Res |= BasicSymbolRef::SF_FormatSpecific;
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index e414de8bcf17..887e2bd0a34b 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -53,7 +53,7 @@ static T getUniversalBinaryStruct(const char *Ptr) {
 MachOUniversalBinary::ObjectForArch::ObjectForArch(
     const MachOUniversalBinary *Parent, uint32_t Index)
     : Parent(Parent), Index(Index) {
-  if (!Parent || Index > Parent->getNumberOfObjects()) {
+  if (!Parent || Index >= Parent->getNumberOfObjects()) {
     clear();
   } else {
     // Parse object header.
@@ -72,9 +72,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
   if (Parent) {
     StringRef ParentData = Parent->getData();
     StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
-    std::string ObjectName =
-        Parent->getFileName().str() + ":" +
-        Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    std::string ObjectName = Parent->getFileName().str();
     MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
         ObjectData, ObjectName, false);
     ErrorOr<ObjectFile *> Obj = ObjectFile::createMachOObjectFile(ObjBuffer);
@@ -91,9 +89,7 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
   if (Parent) {
     StringRef ParentData = Parent->getData();
     StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
-    std::string ObjectName =
-        Parent->getFileName().str() + ":" +
-        Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+    std::string ObjectName = Parent->getFileName().str();
     MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
         ObjectData, ObjectName, false);
     ErrorOr<Archive *> Obj = Archive::create(ObjBuffer);
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index a5ab8d747d87..28b42a8f8290 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -350,30 +350,27 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) {
 }
 
 Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      BaseArgs.MakeIndex(Opt.getName()), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       BaseArgs.MakeIndex(Opt.getName()), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
 Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
                                        StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Value);
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       Index, BaseArgs.getArgString(Index), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
 Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
                                      StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
-  SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index + 1), BaseArg));
+  SynthesizedArgs.push_back(
+      make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+                       Index, BaseArgs.getArgString(Index + 1), BaseArg));
   return SynthesizedArgs.back().get();
 }
 
@@ -381,8 +378,7 @@ Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
                                    StringRef Value) const {
   unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
   SynthesizedArgs.push_back(make_unique<Arg>(
-      Opt,
-      ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
-      Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
+      Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
+      BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
   return SynthesizedArgs.back().get();
 }
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 2ef32b08ef28..ac4ff3eb5c66 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements atomic operations.
+//  This file implements atomic operations.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 9febb84f2b1d..2438d729d8de 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -89,7 +89,6 @@ add_llvm_library(LLVMSupport
   TimeValue.cpp
   Valgrind.cpp
   Watchdog.cpp
-  WindowsError.cpp
 
   ADDITIONAL_HEADERS
   Unix/Host.inc
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b2bb7466c112..c2b739fa7360 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -145,6 +145,7 @@ void OptionCategory::registerCategory() {
 static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
                           SmallVectorImpl<Option*> &SinkOpts,
                           StringMap<Option*> &OptionsMap) {
+  bool HadErrors = false;
   SmallVector<const char*, 16> OptionNames;
   Option *CAOpt = nullptr;  // The ConsumeAfter option if it exists.
   for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
@@ -158,8 +159,9 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
     for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
       // Add argument to the argument map!
       if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
-        errs() << ProgramName << ": CommandLine Error: Argument '"
-             << OptionNames[i] << "' defined more than once!\n";
+        errs() << ProgramName << ": CommandLine Error: Option '"
+               << OptionNames[i] << "' registered more than once!\n";
+        HadErrors = true;
       }
     }
 
@@ -171,8 +173,10 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
     else if (O->getMiscFlags() & cl::Sink) // Remember sink options
       SinkOpts.push_back(O);
     else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
-      if (CAOpt)
+      if (CAOpt) {
         O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+        HadErrors = true;
+      }
       CAOpt = O;
     }
   }
@@ -182,6 +186,12 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
 
   // Make sure that they are in order of registration not backwards.
   std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+
+  // Fail hard if there were errors. These are strictly unrecoverable and
+  // indicate serious issues such as conflicting option names or an incorrectly
+  // linked LLVM distribution.
+  if (HadErrors)
+    report_fatal_error("inconsistency in registered CommandLine options");
 }
 
 
@@ -1699,7 +1709,7 @@ class VersionPrinter {
     OS << "LLVM (http://llvm.org/):\n"
        << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
 #ifdef LLVM_VERSION_INFO
-    OS << LLVM_VERSION_INFO;
+    OS << " " << LLVM_VERSION_INFO;
 #endif
     OS << "\n  ";
 #ifndef __OPTIMIZE__
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 82d7c0cc6d19..d2b551e8a0a6 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system DynamicLibrary concept.
+//  This file implements the operating system DynamicLibrary concept.
 //
 // FIXME: This file leaks ExplicitSymbols and OpenedHandles!
 //
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index ae1d64db4b82..c36007f55999 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -18,10 +18,12 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/WindowsError.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
@@ -134,3 +136,70 @@ void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
 void LLVMResetFatalErrorHandler() {
   remove_fatal_error_handler();
 }
+
+#ifdef LLVM_ON_WIN32
+
+#include <winerror.h>
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y)                                                  \
+  case x:                                                                      \
+    return make_error_code(errc::y)
+
+std::error_code llvm::mapWindowsError(unsigned EV) {
+  switch (EV) {
+    MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+    MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+    MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+    MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+    MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+    MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+    MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+    MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+    MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+    MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+    MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+    MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+    MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+    MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+    MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+    MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+    MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+    MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+    MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+    MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+    MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+    MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+    MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+    MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+    MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+    MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+    MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+    MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+    MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+    MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+    MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+    MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+    MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+    MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+    MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+    MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+    MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+    MAP_ERR_TO_COND(WSAEINTR, interrupted);
+    MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+    MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+    MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+  default:
+    return std::error_code(EV, std::system_category());
+  }
+}
+
+#endif
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index ce0a3b6bed77..03187e97eef7 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Host concept.
+//  This file implements the operating system Host concept.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Makefile b/lib/Support/Makefile
index 4a2185d589e5..39426aaaacee 100644
--- a/lib/Support/Makefile
+++ b/lib/Support/Makefile
@@ -17,3 +17,7 @@ include $(LEVEL)/Makefile.common
 
 CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
 CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
+
+ifdef LLVM_VERSION_INFO
+CompileCommonOpts += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"'
+endif
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6a1c2a545a8d..da01195a9df6 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -15,15 +15,32 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
 #include <cassert>
+#include <mutex>
 using namespace llvm;
 
 static const ManagedStaticBase *StaticList = nullptr;
 
+// ManagedStatics can get created during execution of static constructors.  As a
+// result, we cannot use a global static std::mutex object for the lock since it
+// may not have been constructed.  Instead, we do a call-once initialization of
+// a pointer to a mutex.
+static std::once_flag MutexInitializationFlag;
+static std::recursive_mutex* ManagedStaticMutex = nullptr;
+
+// Not all supported platforms (in particular VS2012) have thread-safe function
+// static initialization, so roll our own.
+static std::recursive_mutex& GetManagedStaticMutex() {
+  std::call_once(MutexInitializationFlag,
+      []() { ManagedStaticMutex = new std::recursive_mutex(); } );
+
+  return *ManagedStaticMutex;
+}
+
 void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
                                               void (*Deleter)(void*)) const {
   assert(Creator);
   if (llvm_is_multithreaded()) {
-    llvm_acquire_global_lock();
+    std::lock_guard<std::recursive_mutex> Lock(GetManagedStaticMutex());
 
     if (!Ptr) {
       void* tmp = Creator();
@@ -43,8 +60,6 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
       Next = StaticList;
       StaticList = this;
     }
-
-    llvm_release_global_lock();
   } else {
     assert(!Ptr && !DeleterFn && !Next &&
            "Partially initialized ManagedStatic!?");
@@ -75,8 +90,8 @@ void ManagedStaticBase::destroy() const {
 
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm::llvm_shutdown() {
+  std::lock_guard<std::recursive_mutex> Lock(GetManagedStaticMutex());
+
   while (StaticList)
     StaticList->destroy();
-
-  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
 }
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 15edf0ddbbd8..535394c5b3ba 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -846,6 +846,40 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
   return create_directory(P, IgnoreExisting);
 }
 
+std::error_code copy_file(const Twine &From, const Twine &To) {
+  int ReadFD, WriteFD;
+  if (std::error_code EC = openFileForRead(From, ReadFD))
+    return EC;
+  if (std::error_code EC = openFileForWrite(To, WriteFD, F_None)) {
+    close(ReadFD);
+    return EC;
+  }
+
+  const size_t BufSize = 4096;
+  char *Buf = new char[BufSize];
+  int BytesRead = 0, BytesWritten = 0;
+  for (;;) {
+    BytesRead = read(ReadFD, Buf, BufSize);
+    if (BytesRead <= 0)
+      break;
+    while (BytesRead) {
+      BytesWritten = write(WriteFD, Buf, BytesRead);
+      if (BytesWritten < 0)
+        break;
+      BytesRead -= BytesWritten;
+    }
+    if (BytesWritten < 0)
+      break;
+  }
+  close(ReadFD);
+  close(WriteFD);
+  delete[] Buf;
+
+  if (BytesRead < 0 || BytesWritten < 0)
+    return std::error_code(errno, std::generic_category());
+  return std::error_code();
+}
+
 bool exists(file_status status) {
   return status_known(status) && status.type() != file_type::file_not_found;
 }
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 0380ed955dd5..087c459e187e 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Process concept.
+//  This file implements the operating system Process concept.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index eb700e3a8591..b84b82b1f10b 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This header file implements the operating system Program concept.
+//  This file implements the operating system Program concept.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 1acfa79b11d5..ca7f3f64aa37 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements llvm_start_multithreaded() and friends.
+// This file defines helper functions for running LLVM in a multi-threaded
+// environment.
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,50 +20,14 @@
 
 using namespace llvm;
 
-static bool multithreaded_mode = false;
-
-static sys::Mutex* global_lock = nullptr;
-
-bool llvm::llvm_start_multithreaded() {
+bool llvm::llvm_is_multithreaded() {
 #if LLVM_ENABLE_THREADS != 0
-  assert(!multithreaded_mode && "Already multithreaded!");
-  multithreaded_mode = true;
-  global_lock = new sys::Mutex(true);
-
-  // We fence here to ensure that all initialization is complete BEFORE we
-  // return from llvm_start_multithreaded().
-  sys::MemoryFence();
   return true;
 #else
   return false;
 #endif
 }
 
-void llvm::llvm_stop_multithreaded() {
-#if LLVM_ENABLE_THREADS != 0
-  assert(multithreaded_mode && "Not currently multithreaded!");
-
-  // We fence here to insure that all threaded operations are complete BEFORE we
-  // return from llvm_stop_multithreaded().
-  sys::MemoryFence();
-
-  multithreaded_mode = false;
-  delete global_lock;
-#endif
-}
-
-bool llvm::llvm_is_multithreaded() {
-  return multithreaded_mode;
-}
-
-void llvm::llvm_acquire_global_lock() {
-  if (multithreaded_mode) global_lock->acquire();
-}
-
-void llvm::llvm_release_global_lock() {
-  if (multithreaded_mode) global_lock->release();
-}
-
 #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
 #include <pthread.h>
 
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 61465ae5e8be..210bda754e74 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -84,14 +85,13 @@ static TimerGroup *getDefaultTimerGroup() {
   sys::MemoryFence();
   if (tmp) return tmp;
   
-  llvm_acquire_global_lock();
+  sys::SmartScopedLock<true> Lock(*TimerLock);
   tmp = DefaultTimerGroup;
   if (!tmp) {
     tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
     sys::MemoryFence();
     DefaultTimerGroup = tmp;
   }
-  llvm_release_global_lock();
 
   return tmp;
 }
diff --git a/lib/Support/WindowsError.cpp b/lib/Support/WindowsError.cpp
deleted file mode 100644
index 18fdbbda9f1c..000000000000
--- a/lib/Support/WindowsError.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-- WindowsError.cpp - Support for mapping windows errors to posix-----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements a mapping from windows errors to posix ones.
-//  The standard doesn't define what the equivalence is from system
-//  errors to generic ones. The one implemented in msvc is too conservative
-//  for llvm, so we do an extra mapping when constructing an error_code
-//  from an windows error. This allows the rest of llvm to simple checks
-//  like "EC == errc::file_exists" and have it work on both posix and
-//  windows.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/WindowsError.h"
-
-#include "llvm/Config/llvm-config.h"
-
-#ifdef LLVM_ON_WIN32
-
-#include <winerror.h>
-
-
-// I'd rather not double the line count of the following.
-#define MAP_ERR_TO_COND(x, y)                                                  \
-  case x:                                                                      \
-    return make_error_code(errc::y)
-
-std::error_code llvm::mapWindowsError(unsigned EV) {
-  switch (EV) {
-    MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
-    MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
-    MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
-    MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
-    MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
-    MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
-    MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
-    MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
-    MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
-    MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
-    MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
-    MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
-    MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
-    MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
-    MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
-    MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
-    MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
-    MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
-    MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
-    MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
-    MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
-    MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
-    MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
-    MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
-    MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
-    MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
-    MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
-    MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
-    MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
-    MAP_ERR_TO_COND(ERROR_SEEK, io_error);
-    MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
-    MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
-    MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
-    MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
-    MAP_ERR_TO_COND(WSAEACCES, permission_denied);
-    MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
-    MAP_ERR_TO_COND(WSAEFAULT, bad_address);
-    MAP_ERR_TO_COND(WSAEINTR, interrupted);
-    MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
-    MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
-    MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
-  default:
-    return std::error_code(EV, std::system_category());
-  }
-}
-
-#endif
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 935d674a3603..fb702187d13a 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMTableGen
   Error.cpp
   Main.cpp
   Record.cpp
+  SetTheory.cpp
   StringMatcher.cpp
   TableGenBackend.cpp
   TGLexer.cpp
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index c553a21c261e..f7843dc8360b 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -811,20 +811,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   }
   case HEAD: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      if (LHSl->getSize() == 0) {
-        assert(0 && "Empty list in car");
-        return nullptr;
-      }
+      assert(LHSl->getSize() != 0 && "Empty list in car");
       return LHSl->getElement(0);
     }
     break;
   }
   case TAIL: {
     if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
-      if (LHSl->getSize() == 0) {
-        assert(0 && "Empty list in cdr");
-        return nullptr;
-      }
+      assert(LHSl->getSize() != 0 && "Empty list in cdr");
       // Note the +1.  We can't just pass the result of getValues()
       // directly.
       ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
diff --git a/utils/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
similarity index 99%
rename from utils/TableGen/SetTheory.cpp
rename to lib/TableGen/SetTheory.cpp
index 5ead7ed16fda..c99c2bab45ab 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SetTheory.h"
 #include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c3ee9bbb8179..cd94e244dc38 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   default:
-    assert(0 && "<unknown operand type>");
+    llvm_unreachable("<unknown operand type>");
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 52094526727d..484e7e8ffce4 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) {
 static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBZW:
   case AArch64::TBNZW:
   case AArch64::TBZX:
@@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
 static unsigned getOppositeConditionOpcode(unsigned Opc) {
   switch (Opc) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBNZW:   return AArch64::TBZW;
   case AArch64::TBNZX:   return AArch64::TBZX;
   case AArch64::TBZW:    return AArch64::TBNZW;
@@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
 static unsigned getBranchDisplacementBits(unsigned Opc) {
   switch (Opc) {
   default:
-    assert(0 && "unexpected opcode!");
+    llvm_unreachable("unexpected opcode!");
   case AArch64::TBNZW:
   case AArch64::TBZW:
   case AArch64::TBNZX:
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7dbcb7bf5ee2..98609760a73a 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2108,7 +2108,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
                 .getVectorElementType()
                 .getSizeInBits()) {
     default:
-      assert(0 && "Unexpected vector element type!");
+      llvm_unreachable("Unexpected vector element type!");
     case 64:
       SubReg = AArch64::dsub;
       break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7a2c9c95b615..bb5290208cb5 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 #ifndef NDEBUG
     MI->dump();
 #endif
-    assert(0 && "Unexpected instruction for custom inserter!");
-    break;
+    llvm_unreachable("Unexpected instruction for custom inserter!");
 
   case AArch64::F128CSEL:
     return EmitF128CSEL(MI, BB);
@@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, BB);
   }
-  llvm_unreachable("Unexpected instruction for custom inserter!");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   bool ExtraOp = false;
   switch (Op.getOpcode()) {
   default:
-    assert(0 && "Invalid code");
+    llvm_unreachable("Invalid code");
   case ISD::ADDC:
     Opc = AArch64ISD::ADDS;
     break;
@@ -4110,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
 // shuffle in combination with VEXTs.
 SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
@@ -4158,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
   SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
   int VEXTOffsets[2] = { 0, 0 };
+  int OffsetMultipliers[2] = { 1, 1 };
 
   // This loop extracts the usage patterns of the source vectors
   // and prepares appropriate SDValues for a shuffle if possible.
   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
-    if (SourceVecs[i].getValueType() == VT) {
+    unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+    SDValue CurSource = SourceVecs[i];
+    if (SourceVecs[i].getValueType().getVectorElementType() !=
+        VT.getVectorElementType()) {
+      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+      // Then bitcast it to the vector which holds asserted element type,
+      // and record the multiplier of element width between SourceVecs and
+      // Build_vector which is needed to extract the correct lanes later.
+      EVT CastVT =
+          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+                           SourceVecs[i].getValueSizeInBits() /
+                               VT.getVectorElementType().getSizeInBits());
+
+      CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+      OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+      NumSrcElts *= OffsetMultipliers[i];
+      MaxElts[i] *= OffsetMultipliers[i];
+      MinElts[i] *= OffsetMultipliers[i];
+    }
+
+    if (CurSource.getValueType() == VT) {
       // No VEXT necessary
-      ShuffleSrcs[i] = SourceVecs[i];
+      ShuffleSrcs[i] = CurSource;
       VEXTOffsets[i] = 0;
       continue;
-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+    } else if (NumSrcElts < NumElts) {
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
-                                   DAG.getUNDEF(SourceVecs[i].getValueType()));
+      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+                                   DAG.getUNDEF(CurSource.getValueType()));
       continue;
     }
 
-    // Don't attempt to extract subvectors from BUILD_VECTOR sources
-    // that expand or trunc the original value.
-    // TODO: We can try to bitcast and ANY_EXTEND the result but
-    // we need to consider the cost of vector ANY_EXTEND, and the
-    // legality of all the types.
-    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType())
-      return SDValue();
-
     // Since only 64-bit and 128-bit vectors are legal on ARM and
     // we've eliminated the other cases...
-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+    assert(NumSrcElts == 2 * NumElts &&
            "unexpected vector sizes in ReconstructShuffle");
 
     if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4197,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     if (MinElts[i] >= NumElts) {
       // The extraction can just take the second half
       VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(NumElts));
     } else if (MaxElts[i] < NumElts) {
       // The extraction can just take the first half
       VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i], DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(0));
     } else {
       // An actual VEXT is needed
       VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i], DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(NumElts));
       unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
       ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
                                    DAG.getConstant(Imm, MVT::i32));
@@ -4232,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     int ExtractElt =
         cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
     if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
     } else {
-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+                     VEXTOffsets[1]);
     }
   }
 
@@ -6674,7 +6684,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
     else if (Vec.getValueType() == MVT::v2i64)
       VecResTy = MVT::v2f64;
     else
-      assert(0 && "unexpected vector type!");
+      llvm_unreachable("unexpected vector type!");
 
     SDValue Convert =
         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8cec4a148070..ce85b2ceba95 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MCInstrDesc &Desc = MI->getDesc();
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+  if (MI->getOpcode() == AArch64::INLINEASM)
+    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
 
+  const MCInstrDesc &Desc = MI->getDesc();
   switch (Desc.getOpcode()) {
   default:
     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
@@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
     *OutUnscaledOp = 0;
   switch (MI.getOpcode()) {
   default:
-    assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+    llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
   // Vector spills/fills can't take an immediate offset.
   case AArch64::LD1Twov2d:
   case AArch64::LD1Threev2d:
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index ab6d37532a70..75a17b9dc999 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
              AArch64II::MO_PAGEOFF)
       RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
     else
-      assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+      llvm_unreachable("Unexpected target flags with MO_GOT on GV operand");
   } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
     if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
       RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
@@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
                                       MCOperand &MCOp) const {
   switch (MO.getType()) {
   default:
-    assert(0 && "unknown operand type");
+    llvm_unreachable("unknown operand type");
   case MachineOperand::MO_Register:
     // Ignore all implicit register operands.
     if (MO.isImplicit())
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f861df0bf99f..5d363a00dc0f 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3447,8 +3447,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
   case Match_MnemonicFail:
     return Error(Loc, "unrecognized instruction mnemonic");
   default:
-    assert(0 && "unexpected error code!");
-    return Error(Loc, "invalid instruction format");
+    llvm_unreachable("unexpected error code!");
   }
 }
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 24663684a3fd..2057c51346af 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
   case LLVMDisassembler_VariantKind_ARM64_TLVP:
   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
   default:
-    assert(0 && "bad LLVMDisassembler_VariantKind");
-    return MCSymbolRefExpr::VK_None;
+    llvm_unreachable("bad LLVMDisassembler_VariantKind");
   }
 }
 
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index f484a5b1bdcc..8a21f0650cdc 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
     else
       O << getRegisterName(Reg);
   } else
-    assert(0 && "unknown operand kind in printPostIncOperand64");
+    llvm_unreachable("unknown operand kind in printPostIncOperand64");
 }
 
 void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
@@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
   while (Stride--) {
     switch (Reg) {
     default:
-      assert(0 && "Vector register expected!");
+      llvm_unreachable("Vector register expected!");
     case AArch64::Q0:  Reg = AArch64::Q1;  break;
     case AArch64::Q1:  Reg = AArch64::Q2;  break;
     case AArch64::Q2:  Reg = AArch64::Q3;  break;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index d8900d4fceb2..a917616f142d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -86,7 +86,7 @@ class AArch64AsmBackend : public MCAsmBackend {
 static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
   default:
-    assert(0 && "Unknown fixup kind!");
+    llvm_unreachable("Unknown fixup kind!");
 
   case AArch64::fixup_aarch64_tlsdesc_call:
     return 0;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 464a18cdbc04..f0513575edb4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                                         const MCSubtargetInfo &STI) const {
   if (MO.isReg())
     return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
-  else {
-    assert(MO.isImm() && "did not expect relocated expression");
-    return static_cast<unsigned>(MO.getImm());
-  }
 
-  assert(0 && "Unable to encode MCOperand!");
-  return 0;
+  assert(MO.isImm() && "did not expect relocated expression");
+  return static_cast<unsigned>(MO.getImm());
 }
 
 template<unsigned FixupKind> uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 5c86189a6ef5..ba9536676e12 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     Log2Size = llvm::Log2_32(4);
     switch (Sym->getKind()) {
     default:
-      assert(0 && "Unexpected symbol reference variant kind!");
+      llvm_unreachable("Unexpected symbol reference variant kind!");
     case MCSymbolRefExpr::VK_PAGEOFF:
       RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
       return true;
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 94faf6f60db3..92eaf9e1c9b3 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
   }
 
-  assert(0 && "Unhandled update pattern!");
-  return 0;
+  llvm_unreachable("Unhandled update pattern!");
 }
 
 // Return true if this MachineInstr inserts a scalar (SPR) value into
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 55e9fe5f5c57..d2c54f3b71b6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -730,6 +730,32 @@ void ARMAsmPrinter::emitAttributes() {
   if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
 
+  if (MMI) {
+    if (const Module *SourceModule = MMI->getModule()) {
+      // ABI_PCS_wchar_t to indicate wchar_t width
+      // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+      if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("wchar_size"))) {
+        int WCharWidth = WCharWidthValue->getZExtValue();
+        assert((WCharWidth == 2 || WCharWidth == 4) &&
+               "wchar_t width must be 2 or 4 bytes");
+        ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+      }
+
+      // ABI_enum_size to indicate enum width
+      // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+      //        (all enums contain a value needing 32 bits to encode).
+      if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+              SourceModule->getModuleFlag("min_enum_size"))) {
+        int EnumWidth = EnumWidthValue->getZExtValue();
+        assert((EnumWidth == 1 || EnumWidth == 4) &&
+               "Minimum enum width must be 1 or 4 bytes");
+        int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+        ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+      }
+    }
+  }
+
   if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
       ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
                         ARMBuildAttrs::AllowTZVirtualization);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 2fd7eddd8748..5fb6ebfeaae1 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,6 +15,7 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4a762ce6d3bd..cf849951088a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -7147,7 +7147,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
   // thumb-2 environment, so there is no interworking required.  As a result, we
   // do not expect a veneer to be emitted by the linker, clobbering IP.
   //
-  // Each module recieves its own copy of __chkstk, so no import thunk is
+  // Each module receives its own copy of __chkstk, so no import thunk is
   // required, again, ensuring that IP is not clobbered.
   //
   // Finally, although some linkers may theoretically provide a trampoline for
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b44a19b01c5f..03eac2e0c84f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> {
 // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
 def rot_imm_XFORM: SDNodeXForm<imm, [{
   switch (N->getZExtValue()){
-  default: assert(0);
+  default: llvm_unreachable(nullptr);
   case 0:  return CurDAG->getTargetConstant(0, MVT::i32);
   case 8:  return CurDAG->getTargetConstant(1, MVT::i32);
   case 16: return CurDAG->getTargetConstant(2, MVT::i32);
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8821c2dd09f8..6d1114d51aab 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "ARMJITInfo.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
 #include "ARMRelocations.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
@@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
     }
   }
 }
+
+void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+  JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+  IsPIC = isPIC;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ee4c863543e7..27e2a2013404 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -14,7 +14,6 @@
 #ifndef ARMJITINFO_H
 #define ARMJITINFO_H
 
-#include "ARMMachineFunctionInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -103,12 +102,7 @@ namespace llvm {
     /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
     /// jump table ids to jump table bases map; remember if codegen relocation
     /// model is PIC.
-    void Initialize(const MachineFunction &MF, bool isPIC) {
-      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
-      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
-      IsPIC = isPIC;
-    }
+    void Initialize(const MachineFunction &MF, bool isPIC);
 
     /// getConstantPoolEntryAddr - The ARM target puts all constant
     /// pool entries into constant islands. This returns the address of the
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a91bb972fb12..a03bcdbddd79 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1735,8 +1735,10 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
 
   // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
-  if (isThumb1)
+  if (isThumb1) {
+    delete RS;
     return false;
+  }
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index af445e2f35aa..892b269fc181 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -12,3 +12,13 @@
 using namespace llvm;
 
 void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+    : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+      hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+      StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+      RestoreSPFromFP(false), LRSpilledForFarJump(false),
+      FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+      GlobalBaseReg(0) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d7ec6eba941c..44a9e3495b90 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -130,16 +130,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
     JumpTableUId(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
 
-  explicit ARMFunctionInfo(MachineFunction &MF) :
-    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
-    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    StByValParamsPadding(0),
-    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
-    LRSpilledForFarJump(false),
-    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-    JumpTableUId(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+  explicit ARMFunctionInfo(MachineFunction &MF);
 
   bool isThumbFunction() const { return isThumb; }
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -220,7 +211,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
-      assert(0 && "Duplicate entries!");
+      llvm_unreachable("Duplicate entries!");
   }
 
   unsigned getOriginalCPIdx(unsigned CloneIdx) const {
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fc842512ef00..e7d699c75614 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -148,7 +148,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
       ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
       TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
       DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
-      TSInfo(DL) {}
+      TSInfo(DL), JITInfo() {}
 
 void ARMSubtarget::initializeEnvironment() {
   HasV4TOps = false;
@@ -417,6 +417,10 @@ bool ARMSubtarget::enablePostMachineScheduler() const {
   return PostRAScheduler;
 }
 
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+  return hasAnyDataBarrier() && !isThumb1Only();
+}
+
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 7da80ec0d494..c13ef865389e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/Triple.h"
@@ -256,10 +257,12 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 
   const DataLayout *getDataLayout() const { return &DL; }
   const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+  ARMJITInfo *getJITInfo() { return &JITInfo; }
 
 private:
   const DataLayout DL;
   ARMSelectionDAGInfo TSInfo;
+  ARMJITInfo JITInfo;
 
   void initializeEnvironment();
   void resetSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -422,6 +425,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
                              TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const override;
 
+  // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
+  bool enableAtomicExpandLoadLinked() const override;
+
   /// getInstrItins - Return the instruction itineraies based on subtarget
   /// selection.
   const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index a93824230d70..434d3b03ae48 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -52,7 +52,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            CodeGenOpt::Level OL,
                                            bool isLittle)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, Options), JITInfo() {
+    Subtarget(TT, CPU, FS, isLittle, Options) {
 
   // Default to triple-appropriate float ABI
   if (Options.FloatABIType == FloatABI::Default)
@@ -171,16 +171,15 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 void ARMPassConfig::addIRPasses() {
-  const ARMSubtarget *Subtarget = &getARMSubtarget();
-  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
-    addPass(createAtomicExpandLoadLinkedPass(TM));
+  addPass(createAtomicExpandLoadLinkedPass(TM));
 
-    // Cmpxchg instructions are often used with a subsequent comparison to
-    // determine whether it succeeded. We can exploit existing control-flow in
-    // ldrex/strex loops to simplify this, but it needs tidying up.
+  // Cmpxchg instructions are often used with a subsequent comparison to
+  // determine whether it succeeded. We can exploit existing control-flow in
+  // ldrex/strex loops to simplify this, but it needs tidying up.
+  const ARMSubtarget *Subtarget = &getARMSubtarget();
+  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
     if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
       addPass(createCFGSimplificationPass());
-  }
 
   TargetPassConfig::addIRPasses();
 }
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 154927786082..737c2fae1a80 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -32,10 +32,6 @@ namespace llvm {
 class ARMBaseTargetMachine : public LLVMTargetMachine {
 protected:
   ARMSubtarget        Subtarget;
-
-private:
-  ARMJITInfo          JITInfo;
-
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
@@ -44,7 +40,6 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
                        CodeGenOpt::Level OL,
                        bool isLittle);
 
-  ARMJITInfo *getJITInfo() override { return &JITInfo; }
   const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
   const ARMTargetLowering *getTargetLowering() const override {
     // Implemented by derived classes
@@ -56,6 +51,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
   const DataLayout *getDataLayout() const override {
     return getSubtargetImpl()->getDataLayout();
   }
+  ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+
   /// \brief Register ARM analysis passes with a pass manager.
   void addAnalysisPasses(PassManagerBase &PM) override;
 
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da7f310..a2ace629baa1 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
 
 unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                 Type *SubTp) const {
-  // We only handle costs of reverse shuffles for now.
-  if (Kind != SK_Reverse)
+  // We only handle costs of reverse and alternate shuffles for now.
+  if (Kind != SK_Reverse && Kind != SK_Alternate)
     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
-    // Reverse shuffle cost one instruction if we are shuffling within a double
-    // word (vrev) or two if we shuffle a quad word (vrev, vext).
-    { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
-    { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
-    { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
-    { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
-  };
+  if (Kind == SK_Reverse) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+        // Reverse shuffle cost one instruction if we are shuffling within a
+        // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
 
-  int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-  if (Idx == -1)
-    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+    int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  return LT.first * NEONShuffleTbl[Idx].Cost;
+    return LT.first * NEONShuffleTbl[Idx].Cost;
+  }
+  if (Kind == SK_Alternate) {
+    static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+        // Alt shuffle cost table for ARM. Cost is the number of instructions
+        // required to create the shuffled vector.
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    int Idx =
+        CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return LT.first * NEONAltShuffleTbl[Idx].Cost;
+  }
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
 unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5b51a52f828a..b8ee55574972 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
   // we have a movt or a movw, but that led to misleadingly results.
   // This is now disallowed in the the AsmParser in validateInstruction()
   // so this should never happen.
-  assert(0 && "expression without :upper16: or :lower16:");
-  return 0;
+  llvm_unreachable("expression without :upper16: or :lower16:");
 }
 
 uint32_t ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e3cfb05b379d..0cb795ba3ccc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,147 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
-
-namespace {
-// A class to keep track of assembler-generated constant pools that are use to
-// implement the ldr-pseudo.
-class ConstantPool {
-  typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
-  EntryVecTy Entries;
-
-public:
-  // Initialize a new empty constant pool
-  ConstantPool() {}
-
-  // Add a new entry to the constant pool in the next slot.
-  // \param Value is the new entry to put in the constant pool.
-  //
-  // \returns a MCExpr that references the newly inserted value
-  const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
-
-  // Emit the contents of the constant pool using the provided streamer.
-  void emitEntries(MCStreamer &Streamer);
-
-  // Return true if the constant pool is empty
-  bool empty();
-};
-}
-
-namespace llvm {
-class AssemblerConstantPools {
-  // Map type used to keep track of per-Section constant pools used by the
-  // ldr-pseudo opcode. The map associates a section to its constant pool. The
-  // constant pool is a vector of (label, value) pairs. When the ldr
-  // pseudo is parsed we insert a new (label, value) pair into the constant pool
-  // for the current section and add MCSymbolRefExpr to the new label as
-  // an opcode to the ldr. After we have parsed all the user input we
-  // output the (label, value) pairs in each constant pool at the end of the
-  // section.
-  //
-  // We use the MapVector for the map type to ensure stable iteration of
-  // the sections at the end of the parse. We need to iterate over the
-  // sections in a stable order to ensure that we have print the
-  // constant pools in a deterministic order when printing an assembly
-  // file.
-  typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
-  ConstantPoolMapTy ConstantPools;
-
-public:
-  AssemblerConstantPools() {}
-  ~AssemblerConstantPools() {}
-
-  void emitAll(MCStreamer &Streamer);
-  void emitForCurrentSection(MCStreamer &Streamer);
-  const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
-
-private:
-  ConstantPool *getConstantPool(const MCSection *Section);
-  ConstantPool &getOrCreateConstantPool(const MCSection *Section);
-};
-}
-
-//
-// ConstantPool implementation
-//
-// Emit the contents of the constant pool using the provided streamer.
-void ConstantPool::emitEntries(MCStreamer &Streamer) {
-  if (Entries.empty())
-    return;
-  Streamer.EmitCodeAlignment(4); // align to 4-byte address
-  Streamer.EmitDataRegion(MCDR_DataRegion);
-  for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
-       I != E; ++I) {
-    Streamer.EmitLabel(I->first);
-    Streamer.EmitValue(I->second, 4);
-  }
-  Streamer.EmitDataRegion(MCDR_DataRegionEnd);
-  Entries.clear();
-}
-
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
-  MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
-
-  Entries.push_back(std::make_pair(CPEntryLabel, Value));
-  return MCSymbolRefExpr::Create(CPEntryLabel, Context);
-}
-
-bool ConstantPool::empty() { return Entries.empty(); }
-
-//
-// AssemblerConstantPools implementation
-//
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
-  ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
-  if (CP == ConstantPools.end())
-    return nullptr;
-
-  return &CP->second;
-}
-
-ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
-  return ConstantPools[Section];
-}
-
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
-                             ConstantPool &CP) {
-  if (!CP.empty()) {
-    Streamer.SwitchSection(Section);
-    CP.emitEntries(Streamer);
-  }
-}
-
-void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
-  // Dump contents of assembler constant pools.
-  for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
-                                   CPE = ConstantPools.end();
-       CPI != CPE; ++CPI) {
-    const MCSection *Section = CPI->first;
-    ConstantPool &CP = CPI->second;
-
-    emitConstantPool(Streamer, Section, CP);
-  }
-}
-
-void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  if (ConstantPool *CP = getConstantPool(Section)) {
-    emitConstantPool(Streamer, Section, *CP);
-  }
-}
-
-const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
-                                               const MCExpr *Expr) {
-  const MCSection *Section = Streamer.getCurrentSection().first;
-  return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
-}
-
 //
 // ARMTargetStreamer Implemenation
 //
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index ea6367a89bce..1c95e06c8923 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const {
   int NewOp = opc;
   if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
     NewOp = Hexagon::getPredOldOpcode(NewOp);
-    if (NewOp < 0)
-      assert(0 && "Couldn't change predicate new instruction to its old form.");
+    assert(NewOp >= 0 &&
+           "Couldn't change predicate new instruction to its old form.");
   }
 
   if (isNewValueStore(NewOp)) { // Convert into non-new-value format
     NewOp = Hexagon::getNonNVStore(NewOp);
-    if (NewOp < 0)
-      assert(0 && "Couldn't change new-value store to its old form.");
+    assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
   }
   return NewOp;
 }
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 3133a51b3f9e..dc9ed2feeb9c 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -116,14 +116,20 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool needsExpansion(MCInst &Inst);
 
-  void expandInstruction(MCInst &Inst, SMLoc IDLoc,
+  // Expands assembly pseudo instructions.
+  // Returns false on success, true otherwise.
+  bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
                          SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadImm(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
-  void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+
+  bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
+
   void expandMemInst(MCInst &Inst, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions, bool isLoad,
                      bool isImmOpnd);
@@ -183,8 +189,6 @@ class MipsAsmParser : public MCTargetAsmParser {
     return STI.getFeatureBits() & Mips::FeatureMips64r6;
   }
 
-  bool parseRegister(unsigned &RegNum);
-
   bool eatComma(StringRef ErrorStr);
 
   int matchCPURegisterName(StringRef Symbol);
@@ -205,7 +209,7 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getGPR(int RegNo);
 
-  int getATReg();
+  int getATReg(SMLoc Loc);
 
   bool processInstruction(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
@@ -967,7 +971,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   }   // if load/store
 
   if (needsExpansion(Inst))
-    expandInstruction(Inst, IDLoc, Instructions);
+    return expandInstruction(Inst, IDLoc, Instructions);
   else
     Instructions.push_back(Inst);
 
@@ -980,17 +984,26 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
   case Mips::LoadImm32Reg:
   case Mips::LoadAddr32Imm:
   case Mips::LoadAddr32Reg:
+  case Mips::LoadImm64Reg:
     return true;
   default:
     return false;
   }
 }
 
-void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
                                       SmallVectorImpl<MCInst> &Instructions) {
   switch (Inst.getOpcode()) {
+  default: assert(0 && "unimplemented expansion");
+    return true;
   case Mips::LoadImm32Reg:
     return expandLoadImm(Inst, IDLoc, Instructions);
+  case Mips::LoadImm64Reg:
+    if (!isGP64()) {
+      Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+    return expandLoadImm(Inst, IDLoc, Instructions);
   case Mips::LoadAddr32Imm:
     return expandLoadAddressImm(Inst, IDLoc, Instructions);
   case Mips::LoadAddr32Reg:
@@ -998,7 +1011,31 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   }
 }
 
-void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+namespace {
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+                   SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  if (PerformShift) {
+    tmpInst.setOpcode(Mips::DSLL);
+    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+    tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+    tmpInst.addOperand(MCOperand::CreateImm(16));
+    tmpInst.setLoc(IDLoc);
+    Instructions.push_back(tmpInst);
+    tmpInst.clear();
+  }
+  tmpInst.setOpcode(Mips::ORi);
+  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+  tmpInst.addOperand(
+      MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+}
+
+bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
@@ -1006,8 +1043,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
   const MCOperand &RegOp = Inst.getOperand(0);
   assert(RegOp.isReg() && "expected register operand kind");
 
-  int ImmValue = ImmOp.getImm();
+  int64_t ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
+  // FIXME: gas has a special case for values that are 000...1111, which
+  // becomes a li -1 and then a dsrl
   if (0 <= ImmValue && ImmValue <= 65535) {
     // For 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
@@ -1024,25 +1063,76 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else {
-    // For any other value of j that is representable as a 32-bit integer.
+  } else if ((ImmValue & 0xffffffff) == ImmValue) {
+    // For any value of j that is representable as a 32-bit integer, create
+    // a sequence of:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
     Instructions.push_back(tmpInst);
-    tmpInst.clear();
-    tmpInst.setOpcode(Mips::ORi);
+    createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+  } else if ((ImmValue & (0xffffLL << 48)) == 0) {
+    if (!isGP64()) {
+      Error (IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+
+    //            <-------  lo32 ------>
+    // <-------  hi32 ------>
+    // <- hi16 ->             <- lo16 ->
+    //  _________________________________
+    // |          |          |          |
+    // | 16-bytes | 16-bytes | 16-bytes |
+    // |__________|__________|__________|
+    //
+    // For any value of j that is representable as a 48-bit integer, create
+    // a sequence of:
+    // li d,j => lui d,hi16(j)
+    //           ori d,d,hi16(lo32(j))
+    //           dsll d,d,16
+    //           ori d,d,lo16(lo32(j))
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+    tmpInst.addOperand(
+        MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32));
+    Instructions.push_back(tmpInst);
+    createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+  } else {
+    if (!isGP64()) {
+      Error (IDLoc, "instruction requires a CPU feature not currently enabled");
+      return true;
+    }
+
+    // <-------  hi32 ------> <-------  lo32 ------>
+    // <- hi16 ->                        <- lo16 ->
+    //  ___________________________________________
+    // |          |          |          |          |
+    // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
+    // |__________|__________|__________|__________|
+    //
+    // For any value of j that isn't representable as a 48-bit integer.
+    // li d,j => lui d,hi16(j)
+    //           ori d,d,lo16(hi32(j))
+    //           dsll d,d,16
+    //           ori d,d,hi16(lo32(j))
+    //           dsll d,d,16
+    //           ori d,d,lo16(lo32(j))
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
-    tmpInst.setLoc(IDLoc);
+    tmpInst.addOperand(
+        MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48));
     Instructions.push_back(tmpInst);
+    createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+    createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
   }
+  return false;
 }
 
-void
+bool
 MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
@@ -1083,9 +1173,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     Instructions.push_back(tmpInst);
   }
+  return false;
 }
 
-void
+bool
 MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
                                     SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
@@ -1117,6 +1208,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
     Instructions.push_back(tmpInst);
   }
+  return false;
 }
 
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
@@ -1127,8 +1219,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
   unsigned TmpRegNum;
-  unsigned AtRegNum = getReg(
-      (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
   // 1st operand is either the source or destination register.
   assert(Inst.getOperand(0).isReg() && "expected register operand kind");
   unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -1148,10 +1238,46 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
     ExprOffset = Inst.getOperand(2).getExpr();
   // All instructions will have the same location.
   TempInst.setLoc(IDLoc);
-  // 1st instruction in expansion is LUi. For load instruction we can use
-  // the dst register as a temporary if base and dst are different,
-  // but for stores we must use $at.
-  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
+  // These are some of the types of expansions we perform here:
+  // 1) lw $8, sym        => lui $8, %hi(sym)
+  //                         lw $8, %lo(sym)($8)
+  // 2) lw $8, offset($9) => lui $8, %hi(offset)
+  //                         add $8, $8, $9
+  //                         lw $8, %lo(offset)($9)
+  // 3) lw $8, offset($8) => lui $at, %hi(offset)
+  //                         add $at, $at, $8
+  //                         lw $8, %lo(offset)($at)
+  // 4) sw $8, sym        => lui $at, %hi(sym)
+  //                         sw $8, %lo(sym)($at)
+  // 5) sw $8, offset($8) => lui $at, %hi(offset)
+  //                         add $at, $at, $8
+  //                         sw $8, %lo(offset)($at)
+  // 6) ldc1 $f0, sym     => lui $at, %hi(sym)
+  //                         ldc1 $f0, %lo(sym)($at)
+  //
+  // For load instructions we can use the destination register as a temporary
+  // if base and dst are different (examples 1 and 2) and if the base register
+  // is general purpose otherwise we must use $at (example 6) and error if it's
+  // not available. For stores we must use $at (examples 4 and 5) because we
+  // must not clobber the source register setting up the offset.
+  const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode());
+  int16_t RegClassOp0 = Desc.OpInfo[0].RegClass;
+  unsigned RegClassIDOp0 =
+      getContext().getRegisterInfo()->getRegClass(RegClassOp0).getID();
+  bool IsGPR = (RegClassIDOp0 == Mips::GPR32RegClassID) ||
+               (RegClassIDOp0 == Mips::GPR64RegClassID);
+  if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
+    TmpRegNum = RegOpNum;
+  else {
+    int AT = getATReg(IDLoc);
+    // At this point we need AT to perform the expansions and we exit if it is
+    // not available.
+    if (!AT)
+      return;
+    TmpRegNum =
+        getReg((isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
+  }
+
   TempInst.setOpcode(Mips::LUi);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   if (isImmOpnd)
@@ -1407,10 +1533,11 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
   return true;
 }
 
-int MipsAsmParser::getATReg() {
+int MipsAsmParser::getATReg(SMLoc Loc) {
   int AT = Options.getATRegNum();
   if (AT == 0)
-    TokError("Pseudo instruction requires $at, which is not available");
+    reportParseError(Loc,
+                     "Pseudo instruction requires $at, which is not available");
   return AT;
 }
 
@@ -1482,6 +1609,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
+  case AsmToken::Tilde:
   case AsmToken::String: {
     DEBUG(dbgs() << ".. generic integer\n");
     OperandMatchResultTy ResTy = ParseImm(Operands);
@@ -1906,6 +2034,7 @@ MipsAsmParser::ParseImm(OperandVector &Operands) {
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
+  case AsmToken::Tilde:
   case AsmToken::String:
     break;
   }
@@ -2342,25 +2471,6 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
   return false;
 }
 
-bool MipsAsmParser::parseRegister(unsigned &RegNum) {
-  if (!getLexer().is(AsmToken::Dollar))
-    return false;
-
-  Parser.Lex();
-
-  const AsmToken &Reg = Parser.getTok();
-  if (Reg.is(AsmToken::Identifier)) {
-    RegNum = matchCPURegisterName(Reg.getIdentifier());
-  } else if (Reg.is(AsmToken::Integer)) {
-    RegNum = Reg.getIntVal();
-  } else {
-    return false;
-  }
-
-  Parser.Lex();
-  return true;
-}
-
 bool MipsAsmParser::eatComma(StringRef ErrorStr) {
   if (getLexer().isNot(AsmToken::Comma)) {
     SMLoc Loc = getLexer().getLoc();
@@ -2400,23 +2510,48 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
   unsigned Save;
   bool SaveIsReg = true;
 
-  if (!parseRegister(FuncReg))
-    return reportParseError("expected register containing function address");
-  FuncReg = getGPR(FuncReg);
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+  OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+  if (ResTy == MatchOperand_NoMatch) {
+    reportParseError("expected register containing function address");
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+  if (!FuncRegOpnd.isGPRAsmReg()) {
+    reportParseError(FuncRegOpnd.getStartLoc(), "invalid register");
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  FuncReg = FuncRegOpnd.getGPR32Reg();
+  TmpReg.clear();
 
   if (!eatComma("expected comma parsing directive"))
     return true;
 
-  if (!parseRegister(Save)) {
+  ResTy = ParseAnyRegister(TmpReg);
+  if (ResTy == MatchOperand_NoMatch) {
     const AsmToken &Tok = Parser.getTok();
     if (Tok.is(AsmToken::Integer)) {
       Save = Tok.getIntVal();
       SaveIsReg = false;
       Parser.Lex();
-    } else
-      return reportParseError("expected save register or stack offset");
-  } else
-    Save = getGPR(Save);
+    } else {
+      reportParseError("expected save register or stack offset");
+      Parser.eatToEndOfStatement();
+      return false;
+    }
+  } else {
+    MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+    if (!SaveOpnd.isGPRAsmReg()) {
+      reportParseError(SaveOpnd.getStartLoc(), "invalid register");
+      Parser.eatToEndOfStatement();
+      return false;
+    }
+    Save = SaveOpnd.getGPR32Reg();
+  }
 
   if (!eatComma("expected comma parsing directive"))
     return true;
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 908166f0a7b3..902b87759dc0 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -576,6 +576,8 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
   //      BLTZC   if rs == rt && rt != 0
   //      BLTC    if rs != rt && rs != 0  && rt != 0
 
+  bool HasRs = false;
+
   InsnType Rs = fieldFromInstruction(insn, 21, 5);
   InsnType Rt = fieldFromInstruction(insn, 16, 5);
   InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
@@ -586,8 +588,14 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
     MI.setOpcode(Mips::BGTZC);
   else if (Rs == Rt)
     MI.setOpcode(Mips::BLTZC);
-  else
-    return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet.
+  else {
+    MI.setOpcode(Mips::BLTC);
+    HasRs = true;
+  }
+
+  if (HasRs)
+    MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                              Rs)));
 
   MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
                                      Rt)));
@@ -627,8 +635,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
   } else if (Rs == Rt) {
     MI.setOpcode(Mips::BLTZALC);
     HasRs = true;
-  } else
-    return MCDisassembler::Fail; // BLTUC not implemented yet
+  } else {
+    MI.setOpcode(Mips::BLTUC);
+    HasRs = true;
+    HasRt = true;
+  }
 
   if (HasRs)
     MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index bc695e6d4f74..d5c3dbc47880 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -65,7 +65,7 @@ class MipsAsmBackend : public MCAsmBackend {
                              const MCRelaxableFragment *DF,
                              const MCAsmLayout &Layout) const override {
     // FIXME.
-    assert(0 && "RelaxInstruction() unimplemented");
+    llvm_unreachable("RelaxInstruction() unimplemented");
     return false;
   }
 
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index 051db7525f6d..9b7ada68ee26 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -95,6 +95,8 @@ def OPCODE6_CLO      : OPCODE6<0b010001>;
 def OPCODE6_CLZ      : OPCODE6<0b010000>;
 def OPCODE6_DCLO     : OPCODE6<0b010011>;
 def OPCODE6_DCLZ     : OPCODE6<0b010010>;
+def OPCODE6_LSA      : OPCODE6<0b000101>;
+def OPCODE6_DLSA     : OPCODE6<0b010101>;
 
 class FIELD_FMT<bits<5> Val> {
   bits<5> Value = Val;
@@ -453,6 +455,23 @@ class SPECIAL3_LL_SC_FM<OPCODE6 Operation> : MipsR6Inst {
   string DecoderMethod = "DecodeSpecial3LlSc";
 }
 
+class SPECIAL_LSA_FM<OPCODE6 Operation> : MipsR6Inst {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<2> imm2;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_SPECIAL.Value;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-8}  = 0b000;
+  let Inst{7-6}   = imm2;
+  let Inst{5-0}   = Operation.Value;
+}
+
 class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst {
   bits<5> rs;
   bits<16> imm;
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 47dafcd29608..9ec41a20b394 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -77,6 +77,11 @@ class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>,
 class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>,
                     DecodeDisambiguatedBy<"BgtzGroupBranch">;
 
+class BLTC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZL>,
+                 DecodeDisambiguatedBy<"BgtzlGroupBranch">;
+class BLTUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZ>,
+                  DecodeDisambiguatedBy<"BgtzGroupBranch">;
+
 class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>,
                   DecodeDisambiguatedBy<"BlezlGroupBranch">;
 class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>,
@@ -155,6 +160,8 @@ class LWC2_R6_ENC : COP2LDST_FM<OPCODE5_LWC2>;
 class SDC2_R6_ENC : COP2LDST_FM<OPCODE5_SDC2>;
 class SWC2_R6_ENC : COP2LDST_FM<OPCODE5_SWC2>;
 
+class LSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_LSA>;
+
 class LL_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LL>;
 class SC_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SC>;
 
@@ -329,6 +336,9 @@ class BGEUC_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR32Opnd>;
 class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>;
 class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>;
 
+class BLTC_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR32Opnd>;
+class BLTUC_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR32Opnd>;
+
 class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>;
 class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>;
 
@@ -566,6 +576,16 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
 class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
 class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
 
+class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                       Operand ImmOpnd> {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2");
+  list<dag> Pattern = [];
+}
+
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+
 class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins mem_simm9:$addr);
@@ -636,8 +656,8 @@ def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
 def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
 def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
 def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
-def BLTC; // Also aliased to bgtc with operands swapped
-def BLTUC; // Also aliased to bgtuc with operands swapped
+def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
+def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
 def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
 def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
 def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
@@ -659,7 +679,7 @@ def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
 def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
 def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6;
 def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6;
-// def LSA; // See MSA
+def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
 def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
 def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
 def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 88422ce19dea..2b9cda7ac83c 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -23,6 +23,8 @@ def uimm16_64      : Operand<i64> {
 // Signed Operand
 def simm10_64 : Operand<i64>;
 
+def imm64: Operand<i64>;
+
 // Transformation Function - get Imm - 32.
 def Subtract32 : SDNodeXForm<imm, [{
   return getImm(N, (unsigned)N->getZExtValue() - 32);
@@ -486,6 +488,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
                     (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
                     ISA_MIPS3;
 
+class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
+                     !strconcat(instr_asm, "\t$rt, $imm64")> ;
+def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+
 /// Move between CPU and coprocessor registers
 let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
 def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index dbfb9fbbf9bc..96d111f64b26 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -29,6 +29,7 @@ class DCLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLO>;
 class DCLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLZ>;
 class DDIV_ENC    : SPECIAL_3R_FM<0b00010, 0b011110>;
 class DDIVU_ENC   : SPECIAL_3R_FM<0b00010, 0b011111>;
+class DLSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_DLSA>;
 class DMOD_ENC    : SPECIAL_3R_FM<0b00011, 0b011110>;
 class DMODU_ENC   : SPECIAL_3R_FM<0b00011, 0b011111>;
 class DMUH_ENC    : SPECIAL_3R_FM<0b00011, 0b111000>;
@@ -61,6 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
 class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
 class DDIV_DESC    : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
 class DDIVU_DESC   : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
 class DMOD_DESC    : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
 class DMODU_DESC   : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
 class DMUH_DESC    : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
@@ -88,7 +90,7 @@ def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
 def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
 def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
 def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
-// def DLSA; // See MSA
+def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
 def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
 def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
 def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 6e8e4980109c..b1b455769477 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1156,7 +1156,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
           ISA_MIPS1_NOT_32R6_64R6;
 }
 
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
+def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
 def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
 def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
 def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
@@ -1419,6 +1419,8 @@ def : MipsInstAlias<"add $rs, $rt, $imm",
                     (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
 def : MipsInstAlias<"and $rs, $rt, $imm",
                     (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"and $rs, $imm",
+                    (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
 def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
 let Predicates = [NotInMicroMips] in {
 def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
@@ -1442,6 +1444,8 @@ def : MipsInstAlias<"xor $rs, $rt, $imm",
                     (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
 def : MipsInstAlias<"or $rs, $rt, $imm",
                     (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"or $rs, $imm",
+                    (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
 def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
 def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
@@ -1484,6 +1488,8 @@ def : MipsInstAlias<"sra $rd, $rt, $rs",
                     (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
 def : MipsInstAlias<"srl $rd, $rt, $rs",
                     (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"sync",
+                    (SYNC 0), 1>, ISA_MIPS2;
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -1536,6 +1542,10 @@ let AdditionalPredicates = [NotDSP] in {
                 (ADDiu GPR32:$src, imm:$imm)>;
 }
 
+// SYNC
+def : MipsPat<(MipsSync (i32 immz)),
+              (SYNC 0)>, ISA_MIPS2;
+
 // Call
 def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
               (JAL tglobaladdr:$dst)>;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index e6cbf643596c..195b3c0fe9ed 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1438,7 +1438,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
         O << "linear";
         break;
       case 2:
-        assert(0 && "Anisotropic filtering is not supported");
+        llvm_unreachable("Anisotropic filtering is not supported");
       default:
         O << "nearest";
         break;
@@ -1562,7 +1562,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       }
       break;
     default:
-      assert(0 && "type not supported yet");
+      llvm_unreachable("type not supported yet");
     }
 
   }
@@ -1682,7 +1682,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
     O << "]";
     break;
   default:
-    assert(0 && "type not supported yet");
+    llvm_unreachable("type not supported yet");
   }
   return;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a4983ad67235..435a93f78c1d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,17 +102,45 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
 
     // Output the constant in big/little endian byte order.
     unsigned Size = Desc.getSize();
-    if (IsLittleEndian) {
-      for (unsigned i = 0; i != Size; ++i) {
-        OS << (char)Bits;
-        Bits >>= 8;
+    switch (Size) {
+    case 4:
+      if (IsLittleEndian) {
+        OS << (char)(Bits);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 24);
+      } else {
+        OS << (char)(Bits >> 24);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits);
       }
-    } else {
-      int ShiftValue = (Size * 8) - 8;
-      for (unsigned i = 0; i != Size; ++i) {
-        OS << (char)(Bits >> ShiftValue);
-        Bits <<= 8;
+      break;
+    case 8:
+      // If we emit a pair of instructions, the first one is
+      // always in the top 32 bits, even on little-endian.
+      if (IsLittleEndian) {
+        OS << (char)(Bits >> 32);
+        OS << (char)(Bits >> 40);
+        OS << (char)(Bits >> 48);
+        OS << (char)(Bits >> 56);
+        OS << (char)(Bits);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 24);
+      } else {
+        OS << (char)(Bits >> 56);
+        OS << (char)(Bits >> 48);
+        OS << (char)(Bits >> 40);
+        OS << (char)(Bits >> 32);
+        OS << (char)(Bits >> 24);
+        OS << (char)(Bits >> 16);
+        OS << (char)(Bits >> 8);
+        OS << (char)(Bits);
       }
+      break;
+    default:
+      llvm_unreachable ("Invalid instruction size");
     }
     
     ++MCNumEmitted;  // Keep track of the # of mi's emitted.
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 1280dec86439..ca1ca56d08fa 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -74,6 +74,12 @@ class PPCFrameLowering: public TargetFrameLowering {
     return isPPC64 ? 16 : 4;
   }
 
+  /// getTOCSaveOffset - Return the previous frame offset to save the
+  /// TOC register -- 64-bit SVR4 ABI only.
+  static unsigned getTOCSaveOffset(void) {
+    return 40;
+  }
+
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
   static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1db9070267a0..f6884d5a2716 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -773,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::Hi:              return "PPCISD::Hi";
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
-  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
   case PPCISD::LOAD:            return "PPCISD::LOAD";
   case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
@@ -3431,11 +3430,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   unsigned CallOpc = PPCISD::CALL;
 
   bool needIndirectCall = true;
-  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
-    // If this is an absolute destination address, use the munged value.
-    Callee = SDValue(Dest, 0);
-    needIndirectCall = false;
-  }
+  if (!isSVR4ABI || !isPPC64)
+    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+      // If this is an absolute destination address, use the munged value.
+      Callee = SDValue(Dest, 0);
+      needIndirectCall = false;
+    }
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -3543,8 +3543,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       // additional register being allocated and an unnecessary move instruction
       // being generated.
       VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+      SDValue TOCOff = DAG.getIntPtrConstant(8);
+      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
-                                       Callee, InFlag);
+                                       AddTOC, InFlag);
       Chain = LoadTOCPtr.getValue(0);
       InFlag = LoadTOCPtr.getValue(1);
 
@@ -3728,7 +3730,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
 
   if (needsTOCRestore) {
     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+    SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+    Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
     InFlag = Chain.getValue(1);
   }
 
@@ -4383,19 +4390,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   // pointers in the 64-bit SVR4 ABI.
   if (!isTailCall &&
       !dyn_cast<GlobalAddressSDNode>(Callee) &&
-      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
-      !isBLACompatibleAddress(Callee, DAG)) {
+      !dyn_cast<ExternalSymbolSDNode>(Callee)) {
     // Load r2 into a virtual register and store it to the TOC save area.
     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
     // TOC save area offset.
-    SDValue PtrOff = DAG.getIntPtrConstant(40);
+    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
                          false, false, 0);
-    // R12 must contain the address of an indirect callee.  This does not
-    // mean the MTCTR instruction must use R12; it's easier to model this
-    // as an extra parameter, so do that.
-    RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9b85c99726e1..2b69208fea7e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -70,19 +70,14 @@ namespace llvm {
 
       TOC_ENTRY,
 
-      /// The following three target-specific nodes are used for calls through
+      /// The following two target-specific nodes are used for calls through
       /// function pointers in the 64-bit SVR4 ABI.
 
-      /// Restore the TOC from the TOC save area of the current stack frame.
-      /// This is basically a hard coded load instruction which additionally
-      /// takes/produces a flag.
-      TOC_RESTORE,
-
       /// Like a regular LOAD but additionally taking/producing a flag.
       LOAD,
 
-      /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
-      /// a hard coded load instruction.
+      /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+      /// destination.
       LOAD_TOC,
 
       /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b71c09ea8e12..9318f70f4305 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
-                    "ld 2, 8($reg)", IIC_LdStLD,
-                    [(PPCload_toc i64:$reg)]>, isPPC64;
-                    
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
-                    "ld 2, 40(1)", IIC_LdStLD,
-                    [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+                    "ld 2, $src", IIC_LdStLD,
+                    [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
 def LDX  : XForm_1<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                    "ldx $rD, $src", IIC_LdStLD,
                    [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 7fed2c65da73..1e4396cd1017 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
   let Inst{30-31} = xo;
 }
 
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
-                InstrItinClass itin, list<dag> pattern>
-         : I<opcode, OOL, IOL, asmstr, itin> {
-   bits<5>  RST;
-   bits<14> DS;
-   bits<5>  RA;
- 
-   let Pattern = pattern;
-   
-   let Inst{6-10}  = RST;
-   let Inst{11-15} = RA;
-   let Inst{16-29} = DS;
-   let Inst{30-31} = xo;
-}
 
 // 1.7.6 X-Form
 class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e421f8e69e65..c2e3382b3e79 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -141,9 +141,6 @@ def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
                           [SDNPHasChain, SDNPSideEffect,
                            SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
-                            [SDNPHasChain, SDNPSideEffect,
-                             SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3fd8e2f9248d..f92bde853770 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -17,6 +17,7 @@
 namespace llvm {
 
 class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
 class AMDGPUTargetMachine;
 class FunctionPass;
 class MCAsmInfo;
@@ -47,6 +48,7 @@ void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 
 // Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 2cfc46393795..6ff9ab7ab7d2 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -32,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
         "false",
         "Disable the if conversion pass">;
 
-def FeatureFP64     : SubtargetFeature<"fp64",
+def FeatureFP64 : SubtargetFeature<"fp64",
         "FP64",
         "true",
-        "Enable 64bit double precision operations">;
+        "Enable double precision operations">;
 
 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
         "Is64bit",
         "true",
-        "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
-        "Is32on64bit",
-        "false",
-        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+        "Specify if 64-bit addressing should be used">;
 
 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
         "R600ALUInst",
         "false",
-        "Older version of ALU instructions encoding.">;
+        "Older version of ALU instructions encoding">;
 
 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
         "HasVertexCache",
         "true",
-        "Specify use of dedicated vertex cache.">;
+        "Specify use of dedicated vertex cache">;
 
 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
         "CaymanISA",
@@ -86,28 +81,40 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+        "localmemorysize"#Value,
+        "LocalMemorySize",
+        !cast<string>(Value),
+        "The size of local memory in bytes">;
+
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
                           Value#" GPU generation", Implies>;
 
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
 def FeatureR600 : SubtargetFeatureGeneration<"R600",
-        [FeatureR600ALUInst, FeatureFetchLimit8]>;
+        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
 
 def FeatureR700 : SubtargetFeatureGeneration<"R700",
-        [FeatureFetchLimit16]>;
+        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
 
 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
-        [FeatureFetchLimit16]>;
+        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
 
 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
-        [FeatureFetchLimit16, FeatureWavefrontSize64]>;
+        [FeatureFetchLimit16, FeatureWavefrontSize64,
+         FeatureLocalMemorySize32768]
+>;
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
-        [Feature64BitPtr, FeatureFP64]>;
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
-        [Feature64BitPtr, FeatureFP64]>;
+        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>;
 //===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 8385baa1011b..b4e86ce3a1a3 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -258,6 +258,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
   }
   case ISD::SCALAR_TO_VECTOR:
+  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
   case ISD::BUILD_VECTOR: {
     unsigned RegClassID;
     const AMDGPURegisterInfo *TRI =
@@ -308,7 +309,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
       // can't be bundled by our scheduler.
       switch(NumVectorElts) {
       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
-      case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+      case 4:
+        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+        else
+          RegClassID = AMDGPU::R600_Reg128RegClassID;
+        break;
       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
       }
     }
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 15ac2a2379d3..1aa92fadbeea 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -218,6 +218,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
 
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+    setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+    setOperationAction(ISD::FRINT, MVT::f64, Custom);
+    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+  }
+
   if (!Subtarget->hasBFI()) {
     // fcopysign can be done in a single instruction with BFI.
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
@@ -253,6 +260,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
   setOperationAction(ISD::ROTR, MVT::i64, Expand);
 
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
   setOperationAction(ISD::MUL, MVT::i64, Expand);
   setOperationAction(ISD::MULHU, MVT::i64, Expand);
   setOperationAction(ISD::MULHS, MVT::i64, Expand);
@@ -297,7 +305,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::BSWAP, VT, Expand);
     setOperationAction(ISD::CTPOP, VT, Expand);
     setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
     setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
   }
 
   static const MVT::SimpleValueType FloatVectorTypes[] = {
@@ -307,6 +317,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   for (MVT VT : FloatVectorTypes) {
     setOperationAction(ISD::FABS, VT, Expand);
     setOperationAction(ISD::FADD, VT, Expand);
+    setOperationAction(ISD::FCEIL, VT, Expand);
     setOperationAction(ISD::FCOS, VT, Expand);
     setOperationAction(ISD::FDIV, VT, Expand);
     setOperationAction(ISD::FPOW, VT, Expand);
@@ -314,6 +325,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FTRUNC, VT, Expand);
     setOperationAction(ISD::FMUL, VT, Expand);
     setOperationAction(ISD::FRINT, VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
     setOperationAction(ISD::FSQRT, VT, Expand);
     setOperationAction(ISD::FSIN, VT, Expand);
     setOperationAction(ISD::FSUB, VT, Expand);
@@ -323,6 +335,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
   }
 
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SELECT_CC);
 
@@ -487,6 +502,11 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::SDIV: return LowerSDIV(Op, DAG);
   case ISD::SREM: return LowerSREM(Op, DAG);
   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+  case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
+  case ISD::FRINT: return LowerFRINT(Op, DAG);
+  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+  case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
 
   // AMDIL DAG lowering.
@@ -822,6 +842,28 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
                          Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
 
+    case Intrinsic::AMDGPU_div_scale:
+      return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_div_fmas:
+      return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_div_fixup:
+      return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+    case Intrinsic::AMDGPU_trig_preop:
+      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+                         Op.getOperand(1), Op.getOperand(2));
+
+    case Intrinsic::AMDGPU_rcp:
+      return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+    case Intrinsic::AMDGPU_rsq:
+      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
     case AMDGPUIntrinsic::AMDGPU_imax:
       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
                                                   Op.getOperand(2));
@@ -886,6 +928,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          Op.getOperand(1),
                          Op.getOperand(2));
 
+    case AMDGPUIntrinsic::AMDGPU_brev:
+      return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
     case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
 
@@ -1563,6 +1608,139 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src)
+  // if (src > 0.0 && src != result)
+  //   result += 1.0
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+  const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  const SDValue Zero = DAG.getConstant(0, MVT::i32);
+  const SDValue One = DAG.getConstant(1, MVT::i32);
+
+  SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+  // Extract the upper half, since this is where we will find the sign and
+  // exponent.
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+  const unsigned FractBits = 52;
+  const unsigned ExpBits = 11;
+
+  // Extract the exponent.
+  SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+                                Hi,
+                                DAG.getConstant(FractBits - 32, MVT::i32),
+                                DAG.getConstant(ExpBits, MVT::i32));
+  SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+                            DAG.getConstant(1023, MVT::i32));
+
+  // Extract the sign bit.
+  const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
+  SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+  // Extend back to to 64-bits.
+  SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                  Zero, SignBit);
+  SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+  SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+  const SDValue FractMask
+    = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
+
+  SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+  SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+  SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+  const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+
+  SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+  SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+  SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+  SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+  return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  assert(Op.getValueType() == MVT::f64);
+
+  APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+  SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+  SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+
+  APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+  SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+  return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+  // FNEARBYINT and FRINT are the same, except in their handling of FP
+  // exceptions. Those aren't really meaningful for us, and OpenCL only has
+  // rint, so just treat them as equivalent.
+  return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  // result = trunc(src);
+  // if (src < 0.0 && src != result)
+  //   result += -1.0.
+
+  SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+  const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+  const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+
+  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+  SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+  SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
 SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDValue S0 = Op.getOperand(0);
@@ -1886,16 +2064,23 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FMIN)
   NODE_NAME_CASE(SMIN)
   NODE_NAME_CASE(UMIN)
+  NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(DIV_SCALE)
+  NODE_NAME_CASE(DIV_FMAS)
+  NODE_NAME_CASE(DIV_FIXUP)
+  NODE_NAME_CASE(TRIG_PREOP)
+  NODE_NAME_CASE(RCP)
+  NODE_NAME_CASE(RSQ)
+  NODE_NAME_CASE(DOT4)
   NODE_NAME_CASE(BFE_U32)
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
   NODE_NAME_CASE(BFM)
+  NODE_NAME_CASE(BREV)
   NODE_NAME_CASE(MUL_U24)
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
   NODE_NAME_CASE(MAD_I24)
-  NODE_NAME_CASE(URECIP)
-  NODE_NAME_CASE(DOT4)
   NODE_NAME_CASE(EXPORT)
   NODE_NAME_CASE(CONST_ADDRESS)
   NODE_NAME_CASE(REGISTER_LOAD)
@@ -1910,6 +2095,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CVT_F32_UBYTE1)
   NODE_NAME_CASE(CVT_F32_UBYTE2)
   NODE_NAME_CASE(CVT_F32_UBYTE3)
+  NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
   NODE_NAME_CASE(STORE_MSKOR)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
   }
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 57f09bebd11d..e2000a04ba4e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -51,6 +51,12 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
@@ -169,6 +175,9 @@ enum {
   DWORDADDR,
   FRACT,
   CLAMP,
+
+  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+  // Denormals handled on some parts.
   COS_HW,
   SIN_HW,
   FMAX,
@@ -178,11 +187,21 @@ enum {
   SMIN,
   UMIN,
   URECIP,
+  DIV_SCALE,
+  DIV_FMAS,
+  DIV_FIXUP,
+  TRIG_PREOP, // 1 ULP max error for f64
+
+  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+  //            For f64, max error 2^29 ULP, handles denormals.
+  RCP,
+  RSQ,
   DOT4,
   BFE_U32, // Extract range of bits with zero extension to 32-bits.
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
   BFI, // (src0 & src1) | (~src0 & src2)
   BFM, // Insert a range of bits into a 32-bit word.
+  BREV, // Reverse bits.
   MUL_U24,
   MUL_I24,
   MAD_U24,
@@ -203,6 +222,15 @@ enum {
   CVT_F32_UBYTE1,
   CVT_F32_UBYTE2,
   CVT_F32_UBYTE3,
+  /// This node is for VLIW targets and it is used to represent a vector
+  /// that is stored in consecutive registers with the same channel.
+  /// For example:
+  ///   |X  |Y|Z|W|
+  /// T0|v.x| | | |
+  /// T1|v.y| | | |
+  /// T2|v.z| | | |
+  /// T3|v.w| | | |
+  BUILD_VERTICAL_VECTOR,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 5cb2559dbb83..d0ee40a67872 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
 ]>;
 
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
@@ -29,6 +37,12 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
 
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
 // out = max(a, b) a and b are floats
 def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
   [SDNPCommutative, SDNPAssociative]
@@ -78,6 +92,21 @@ def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
 // e is rounding error
 def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
 
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+//  Special case divide FMA with scale and flags (src0 = Quotient,
+//  src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
 def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
                           SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                           [SDNPHasChain, SDNPMayLoad]>;
@@ -105,6 +134,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
 
+def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
+
 // Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
 // performing the mulitply.  The result is a 32-bit value.
 def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 8bfc11cd468c..14bfd8cc18d5 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -519,6 +519,16 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
   >;
 }
 
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+  (fdiv FP_ONE, vt:$src),
+  (RcpInst $src)
+>;
+
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+  (AMDGPUrcp (fsqrt vt:$src)),
+  (RsqInst $src)
+>;
+
 include "R600Instructions.td"
 include "R700Instructions.td"
 include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 95fab11a260f..27c0dbea09f4 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -30,8 +30,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
   def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
@@ -64,10 +62,12 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
+  def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
 }
 
-// Legacy names for compatability.
+// Legacy names for compatibility.
 let TargetPrefix = "AMDIL", isTarget = 1 in {
   def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
new file mode 100644
index 000000000000..2d3a7fdd4feb
--- /dev/null
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -0,0 +1,365 @@
+//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates allocas by either converting them into vectors or
+// by migrating them to local address space.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "amdgpu-promote-alloca"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteAlloca : public FunctionPass,
+                       public InstVisitor<AMDGPUPromoteAlloca> {
+
+  static char ID;
+  Module *Mod;
+  const AMDGPUSubtarget &ST;
+  int LocalMemAvailable;
+
+public:
+  AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
+                                                   LocalMemAvailable(0) { }
+  virtual bool doInitialization(Module &M);
+  virtual bool runOnFunction(Function &F);
+  virtual const char *getPassName() const {
+    return "AMDGPU Promote Alloca";
+  }
+  void visitAlloca(AllocaInst &I);
+};
+
+} // End anonymous namespace
+
+char AMDGPUPromoteAlloca::ID = 0;
+
+bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
+  Mod = &M;
+  return false;
+}
+
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+
+  const FunctionType *FTy = F.getFunctionType();
+
+  LocalMemAvailable = ST.getLocalMemorySize();
+
+
+  // If the function has any arguments in the local address space, then it's
+  // possible these arguments require the entire local memory space, so
+  // we cannot use local memory in the pass.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+    const Type *ParamTy = FTy->getParamType(i);
+    if (ParamTy->isPointerTy() &&
+        ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+      LocalMemAvailable = 0;
+      DEBUG(dbgs() << "Function has local memory argument.  Promoting to "
+                      "local memory disabled.\n");
+      break;
+    }
+  }
+
+  if (LocalMemAvailable > 0) {
+    // Check how much local memory is being used by global objects
+    for (Module::global_iterator I = Mod->global_begin(),
+                                 E = Mod->global_end(); I != E; ++I) {
+      GlobalVariable *GV = I;
+      PointerType *GVTy = GV->getType();
+      if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+        continue;
+      for (Value::use_iterator U = GV->use_begin(),
+                               UE = GV->use_end(); U != UE; ++U) {
+        Instruction *Use = dyn_cast<Instruction>(*U);
+        if (!Use)
+          continue;
+        if (Use->getParent()->getParent() == &F)
+          LocalMemAvailable -=
+              Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+      }
+    }
+  }
+
+  LocalMemAvailable = std::max(0, LocalMemAvailable);
+  DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
+
+  visit(F);
+
+  return false;
+}
+
+static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+  return VectorType::get(ArrayTy->getArrayElementType(),
+                         ArrayTy->getArrayNumElements());
+}
+
+static Value* calculateVectorIndex(Value *Ptr,
+                                  std::map<GetElementPtrInst*, Value*> GEPIdx) {
+  if (isa<AllocaInst>(Ptr))
+    return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+
+  return GEPIdx[GEP];
+}
+
+static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
+  // FIXME we only support simple cases
+  if (GEP->getNumOperands() != 3)
+    return NULL;
+
+  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+  if (!I0 || !I0->isZero())
+    return NULL;
+
+  return GEP->getOperand(2);
+}
+
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+  Type *AllocaTy = Alloca->getAllocatedType();
+
+  DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
+
+  // FIXME: There is no reason why we can't support larger arrays, we
+  // are just being conservative for now.
+  if (!AllocaTy->isArrayTy() ||
+      AllocaTy->getArrayElementType()->isVectorTy() ||
+      AllocaTy->getArrayNumElements() > 4) {
+
+    DEBUG(dbgs() << "  Cannot convert type to vector");
+    return false;
+  }
+
+  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+  std::vector<Value*> WorkList;
+  for (User *AllocaUser : Alloca->users()) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
+    if (!GEP) {
+      WorkList.push_back(AllocaUser);
+      continue;
+    }
+
+    Value *Index = GEPToVectorIndex(GEP);
+
+    // If we can't compute a vector index from this GEP, then we can't
+    // promote this alloca to vector.
+    if (!Index) {
+      DEBUG(dbgs() << "  Cannot compute vector index for GEP " << *GEP << "\n");
+      return false;
+    }
+
+    GEPVectorIdx[GEP] = Index;
+    for (User *GEPUser : AllocaUser->users()) {
+      WorkList.push_back(GEPUser);
+    }
+  }
+
+  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+
+  DEBUG(dbgs() << "  Converting alloca to vector "; AllocaTy->dump();
+        dbgs() << " -> "; VectorTy->dump(); dbgs() << "\n");
+
+  for (std::vector<Value*>::iterator I = WorkList.begin(),
+                                     E = WorkList.end(); I != E; ++I) {
+    Instruction *Inst = cast<Instruction>(*I);
+    IRBuilder<> Builder(Inst);
+    switch (Inst->getOpcode()) {
+    case Instruction::Load: {
+      Value *Ptr = Inst->getOperand(0);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+      Inst->replaceAllUsesWith(ExtractElement);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::Store: {
+      Value *Ptr = Inst->getOperand(1);
+      Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *NewVecValue = Builder.CreateInsertElement(VecValue,
+                                                       Inst->getOperand(0),
+                                                       Index);
+      Builder.CreateStore(NewVecValue, BitCast);
+      Inst->eraseFromParent();
+      break;
+    }
+    case Instruction::BitCast:
+      break;
+
+    default:
+      Inst->dump();
+      llvm_unreachable("Do not know how to replace this instruction "
+                              "with vector op");
+    }
+  }
+  return true;
+}
+
+static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+  for (User *User : Val->users()) {
+    if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+      continue;
+    if (isa<CallInst>(User)) {
+      WorkList.push_back(User);
+      continue;
+    }
+    if (!User->getType()->isPointerTy())
+      continue;
+    WorkList.push_back(User);
+    collectUsesWithPtrTypes(User, WorkList);
+  }
+}
+
+void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+  IRBuilder<> Builder(&I);
+
+  // First try to replace the alloca with a vector
+  Type *AllocaTy = I.getAllocatedType();
+
+  DEBUG(dbgs() << "Trying to promote " << I);
+
+  if (tryPromoteAllocaToVector(&I))
+    return;
+
+  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
+
+  // FIXME: This is the maximum work group size.  We should try to get
+  // value from the reqd_work_group_size function attribute if it is
+  // available.
+  unsigned WorkGroupSize = 256;
+  int AllocaSize = WorkGroupSize *
+      Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+
+  if (AllocaSize > LocalMemAvailable) {
+    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
+    return;
+  }
+
+  DEBUG(dbgs() << "Promoting alloca to local memory\n");
+  LocalMemAvailable -= AllocaSize;
+
+  GlobalVariable *GV = new GlobalVariable(
+      *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
+      GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+
+  FunctionType *FTy = FunctionType::get(
+      Type::getInt32Ty(Mod->getContext()), false);
+  AttributeSet AttrSet;
+  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
+
+  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.y", FTy, AttrSet);
+  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.local.size.z", FTy, AttrSet);
+  Value *ReadTIDIGX = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.x", FTy, AttrSet);
+  Value *ReadTIDIGY = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.y", FTy, AttrSet);
+  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
+      "llvm.r600.read.tidig.z", FTy, AttrSet);
+
+
+  Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
+  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
+  Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
+  Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
+  Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);
+
+  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
+  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
+  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
+  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
+  TID = Builder.CreateAdd(TID, TIdZ);
+
+  std::vector<Value*> Indices;
+  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
+  Indices.push_back(TID);
+
+  Value *Offset = Builder.CreateGEP(GV, Indices);
+  I.mutateType(Offset->getType());
+  I.replaceAllUsesWith(Offset);
+  I.eraseFromParent();
+
+  std::vector<Value*> WorkList;
+
+  collectUsesWithPtrTypes(Offset, WorkList);
+
+  for (std::vector<Value*>::iterator i = WorkList.begin(),
+                                     e = WorkList.end(); i != e; ++i) {
+    Value *V = *i;
+    CallInst *Call = dyn_cast<CallInst>(V);
+    if (!Call) {
+      Type *EltTy = V->getType()->getPointerElementType();
+      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+      V->mutateType(NewTy);
+      continue;
+    }
+
+    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
+    if (!Intr) {
+      std::vector<Type*> ArgTypes;
+      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
+                                ArgIdx != ArgEnd; ++ArgIdx) {
+        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
+      }
+      Function *F = Call->getCalledFunction();
+      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
+                                                F->isVarArg());
+      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
+                                             F->getAttributes());
+      Function *NewF = cast<Function>(C);
+      Call->setCalledFunction(NewF);
+      continue;
+    }
+
+    Builder.SetInsertPoint(Intr);
+    switch (Intr->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      // These intrinsics are for address space 0 only
+      Intr->eraseFromParent();
+      continue;
+    case Intrinsic::memcpy: {
+      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
+      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
+                           MemCpy->getLength(), MemCpy->getAlignment(),
+                           MemCpy->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MemSet = cast<MemSetInst>(Intr);
+      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
+                           MemSet->getLength(), MemSet->getAlignment(),
+                           MemSet->isVolatile());
+      Intr->eraseFromParent();
+      continue;
+    }
+    default:
+      Intr->dump();
+      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
+    }
+  }
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
+  return new AMDGPUPromoteAlloca(ST);
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 348d50f93f42..4fd43905b210 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -41,6 +41,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
   EnableIfCvt = true;
   WavefrontSize = 0;
   CFALUBug = false;
+  LocalMemorySize = 0;
   ParseSubtargetFeatures(GPU, FS);
   DevName = GPU;
 
@@ -109,6 +110,10 @@ AMDGPUSubtarget::hasCFAluBug() const {
   assert(getGeneration() <= NORTHERN_ISLANDS);
   return CFALUBug;
 }
+int
+AMDGPUSubtarget::getLocalMemorySize() const {
+  return LocalMemorySize;
+}
 bool
 AMDGPUSubtarget::isTargetELF() const {
   return false;
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 411aaf9711e7..bbbd997f6af8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,7 +44,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
 private:
   std::string DevName;
   bool Is64bit;
-  bool Is32on64bit;
   bool DumpCode;
   bool R600ALUInst;
   bool HasVertexCache;
@@ -56,6 +55,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
   bool EnableIfCvt;
   unsigned WavefrontSize;
   bool CFALUBug;
+  int LocalMemorySize;
 
   InstrItineraryData InstrItins;
 
@@ -109,6 +109,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
   unsigned getWavefrontSize() const;
   unsigned getStackEntrySize() const;
   bool hasCFAluBug() const;
+  int getLocalMemorySize() const;
 
   bool enableMachineScheduler() const override {
     return getGeneration() <= NORTHERN_ISLANDS;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 3c896af46a6c..be1eceaaa00d 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -109,6 +109,7 @@ class AMDGPUPassConfig : public TargetPassConfig {
     return nullptr;
   }
 
+  virtual void addCodeGenPrepare();
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addPreRegAlloc() override;
@@ -134,6 +135,13 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
   PM.add(createAMDGPUTargetTransformInfoPass(this));
 }
 
+void AMDGPUPassConfig::addCodeGenPrepare() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  addPass(createAMDGPUPromoteAlloca(ST));
+  addPass(createSROAPass());
+  TargetPassConfig::addCodeGenPrepare();
+}
+
 bool
 AMDGPUPassConfig::addPreISel() {
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 128dd683fb25..be2ca06ef34d 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_target(R600CodeGen
   AMDGPUTargetTransformInfo.cpp
   AMDGPUISelLowering.cpp
   AMDGPUInstrInfo.cpp
+  AMDGPUPromoteAlloca.cpp
   AMDGPURegisterInfo.cpp
   R600ClauseMergePass.cpp
   R600ControlFlowFinalizer.cpp
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 28725ff280da..dcb7e982c7fc 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -348,7 +348,7 @@ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
 
 def GROUP_BARRIER : InstR600 <
-    (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+    (outs), (ins), "  GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
     R600ALU_Word0,
     R600ALU_Word1_OP2 <0x54> {
 
@@ -377,6 +377,11 @@ def GROUP_BARRIER : InstR600 <
   let ALUInst = 1;
 }
 
+def : Pat <
+	(int_AMDGPU_barrier_global),
+	(GROUP_BARRIER)
+>;
+
 //===----------------------------------------------------------------------===//
 // LDS Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 5e7cefed0ace..dc1344fb8d3f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
                                         SmallVectorImpl<MCFixup> &Fixup,
                                         const MCSubtargetInfo &STI) const {
   if (MO.isReg()) {
-    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
       return MRI.getEncodingValue(MO.getReg());
-    } else {
-      return getHWReg(MO.getReg());
-    }
-  } else if (MO.isImm()) {
-    return MO.getImm();
-  } else {
-    assert(0);
-    return 0;
+    return getHWReg(MO.getReg());
   }
+
+  assert(MO.isImm());
+  return MO.getImm();
 }
 
 #include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 87238d6156b9..f0e13e56d8ff 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -136,6 +136,16 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
 
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::FP_TO_SINT);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -147,6 +157,12 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::UDIV, MVT::i64, Custom);
   setOperationAction(ISD::UREM, MVT::i64, Custom);
 
+  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
+  //  to be Legal/Custom in order to avoid library calls.
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -540,6 +556,11 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
   R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
   case ISD::FCOS:
   case ISD::FSIN: return LowerTrig(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -812,6 +833,56 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
+SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
+                                                   SDValue Vector) const {
+
+  SDLoc DL(Vector);
+  EVT VecVT = Vector.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SmallVector<SDValue, 8> Args;
+
+  for (unsigned i = 0, e = VecVT.getVectorNumElements();
+                                                           i != e; ++i) {
+    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+                               Vector, DAG.getConstant(i, getVectorIdxTy())));
+  }
+
+  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
+}
+
+SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Index = Op.getOperand(1);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
+                     Vector, Index);
+}
+
+SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Vector = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+
+  if (isa<ConstantSDNode>(Index) ||
+      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+    return Op;
+
+  Vector = vectorToVerticalVector(DAG, Vector);
+  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
+                               Vector, Value, Index);
+  return vectorToVerticalVector(DAG, Insert);
+}
+
 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   // On hw >= R700, COS/SIN input must be between -1. and 1.
   // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
@@ -843,6 +914,80 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
       DAG.getConstantFP(3.14159265359, MVT::f32));
 }
 
+SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue One  = DAG.getConstant(1, VT);
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
+  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
+
+  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
+  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
+  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
+
+  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
+  SDValue LoBig = Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
+SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shift = Op.getOperand(2);
+  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue One  = DAG.getConstant(1, VT);
+
+  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
+
+  SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT);
+  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+  SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+  // The dance around Width1 is necessary for 0 special case.
+  // Without it the CompShift might be 32, producing incorrect results in
+  // Overflow. So we do the shift in two steps, the alternative is to
+  // add a conditional to filter the special case.
+
+  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
+  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
+
+  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
+  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
+  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
+
+  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
+  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
+
+  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(
       ISD::SETCC,
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 301509fc8cf0..381642aa600c 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -51,12 +51,17 @@ class R600TargetLowering : public AMDGPUTargetLowering {
   void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
       MachineRegisterInfo & MRI, unsigned dword_offset) const;
   SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+  SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
 
+  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
                                           SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 080ff6eea75d..3972e2f03730 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -51,11 +51,15 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const {
   unsigned VectorComponents = 0;
-  if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
-      AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
+      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
+       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 4;
-  } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
-            AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
+  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
+            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
+             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
     VectorComponents = 2;
   }
 
@@ -1053,6 +1057,29 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return 2;
 }
 
+bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+
+  switch(MI->getOpcode()) {
+  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+  case AMDGPU::R600_EXTRACT_ELT_V2:
+  case AMDGPU::R600_EXTRACT_ELT_V4:
+    buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
+                      RI.getHWRegIndex(MI->getOperand(1).getReg()), //  Address
+                      MI->getOperand(2).getReg(),
+                      RI.getHWRegChan(MI->getOperand(1).getReg()));
+    break;
+  case AMDGPU::R600_INSERT_ELT_V2:
+  case AMDGPU::R600_INSERT_ELT_V4:
+    buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
+                       RI.getHWRegIndex(MI->getOperand(1).getReg()),  // Address
+                       MI->getOperand(3).getReg(),                    // Offset
+                       RI.getHWRegChan(MI->getOperand(1).getReg()));  // Channel
+    break;
+  }
+  MI->eraseFromParent();
+  return true;
+}
+
 void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
                                              const MachineFunction &MF) const {
   const AMDGPUFrameLowering *TFL =
@@ -1090,7 +1117,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
                                        MachineBasicBlock::iterator I,
                                        unsigned ValueReg, unsigned Address,
                                        unsigned OffsetReg) const {
-  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
                                                AMDGPU::AR_X, OffsetReg);
   setImmOperand(MOVA, AMDGPU::OpName::write, 0);
@@ -1107,7 +1149,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
                                        MachineBasicBlock::iterator I,
                                        unsigned ValueReg, unsigned Address,
                                        unsigned OffsetReg) const {
-  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg,
+                                       unsigned AddrChan) const {
+  unsigned AddrReg;
+  switch (AddrChan) {
+    default: llvm_unreachable("Invalid Channel");
+    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+  }
   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
                                                        AMDGPU::AR_X,
                                                        OffsetReg);
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 60df21571b35..45a57d367b8b 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -36,6 +36,18 @@ namespace llvm {
   std::vector<std::pair<int, unsigned> >
   ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
 
+
+  MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
+
+  MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned ValueReg, unsigned Address,
+                                        unsigned OffsetReg,
+                                        unsigned AddrChan) const;
   public:
   enum BankSwizzle {
     ALU_VEC_012_SCL_210 = 0,
@@ -195,6 +207,8 @@ namespace llvm {
   int getInstrLatency(const InstrItineraryData *ItinData,
                       SDNode *Node) const override { return 1;}
 
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
   /// \brief Reserve the registers that may be accesed using indirect addressing.
   void reserveIndirectRegisters(BitVector &Reserved,
                                 const MachineFunction &MF) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index ec039c50dad7..47b7da0955d8 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1083,7 +1083,7 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
 }
 
 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
-  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+  inst, "RECIPSQRT_CLAMPED", AMDGPUrsq
 > {
   let Itinerary = TransALU;
 }
@@ -1581,6 +1581,60 @@ let isTerminator=1 in {
   defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
 }
 
+//===----------------------------------------------------------------------===//
+// Indirect addressing pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+class ExtractVertical <RegisterClass vec_rc> : InstR600 <
+  (outs R600_Reg32:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+let Constraints = "$dst = $vec" in {
+
+class InsertVertical <RegisterClass vec_rc> : InstR600 <
+  (outs vec_rc:$dst),
+  (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
+  [],
+  AnyALU
+>;
+
+} // End Constraints = "$dst = $vec"
+
+} // End isPseudo = 1
+
+def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
+def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
+
+def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
+def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
+
+class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
+                          ValueType scalar_ty> : Pat <
+  (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
+  (inst $vec, $index)
+>;
+
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
+
+class InsertVerticalPat <Instruction inst, ValueType vec_ty,
+                         ValueType scalar_ty> : Pat <
+  (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
+  (inst $vec, $value, $index)
+>;
+
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
+
 //===----------------------------------------------------------------------===//
 // ISel Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 68bcd207b42c..cc667d985a82 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -18,18 +18,28 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
 
 class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
     RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1, sub2, sub3];
-  let HWEncoding = encoding;
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
 }
 
 class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
     RegisterWithSubRegs<n, subregs> {
+  field bits<2> chan_encoding = 0;
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = encoding;
+  let HWEncoding{8-0} = encoding{8-0};
+  let HWEncoding{10-9} = chan_encoding;
 }
 
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+  "V"#lo#hi#"_"#chan,
+  [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+  lo
+>;
 
 foreach Index = 0-127 in {
   foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -54,6 +64,24 @@ foreach Index = 0-127 in {
                                    Index>;
 }
 
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+  let chan_encoding = !if(!eq(Chan, "X"), 0,
+                      !if(!eq(Chan, "Y"), 1,
+                      !if(!eq(Chan, "Z"), 2,
+                      !if(!eq(Chan, "W"), 3, 0)))) in {
+    def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+                                   [!cast<Register>("T0_"#Chan),
+                                    !cast<Register>("T1_"#Chan),
+                                    !cast<Register>("T2_"#Chan),
+                                    !cast<Register>("T3_"#Chan)],
+                                    0>;
+    def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+    def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+  }
+}
+
+
 // KCACHE_BANK0
 foreach Index = 159-128 in {
   foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -130,8 +158,14 @@ def ALU_PARAM : R600Reg<"Param", 0>;
 
 let isAllocatable = 0 in {
 
-// XXX: Only use the X channel, until we support wider stack widths
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
+
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
 
 def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
   (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
@@ -206,5 +240,13 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
   let CopyCost = -1;
 }
 
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+  (add V0123_W, V0123_Z, V0123_Y, V0123_X)
+>;
+
 def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
                                 (add (sequence "T%u_XY", 0, 63))>;
+
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+                                      (add V01_X, V01_Y, V01_Z, V01_W,
+                                           V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 846aeb63093a..4e61d5b03aa6 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -212,6 +212,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::FRINT, MVT::f64, Legal);
   }
 
+  // FIXME: This should be removed and handled the same was as f32 fneg. Source
+  // modifiers also work for the double instructions.
+  setOperationAction(ISD::FNEG, MVT::f64, Expand);
+
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::SETCC);
 
@@ -481,19 +485,20 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MI->eraseFromParent();
     break;
   }
-  case AMDGPU::V_SUB_F64:
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
-            MI->getOperand(0).getReg())
-            .addReg(MI->getOperand(1).getReg())
-            .addReg(MI->getOperand(2).getReg())
-            .addImm(0)  /* src2 */
-            .addImm(0)  /* ABS */
-            .addImm(0)  /* CLAMP */
-            .addImm(0)  /* OMOD */
-            .addImm(2); /* NEG */
+  case AMDGPU::V_SUB_F64: {
+    unsigned DestReg = MI->getOperand(0).getReg();
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
+      .addImm(0)  // SRC0 modifiers
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(1)  // SRC1 modifiers
+      .addReg(MI->getOperand(2).getReg())
+      .addImm(0)  // SRC2 modifiers
+      .addImm(0)  // src2
+      .addImm(0)  // CLAMP
+      .addImm(0); // OMOD
     MI->eraseFromParent();
     break;
-
+  }
   case AMDGPU::SI_RegisterStorePseudo: {
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
     unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index a17fed7e7ea7..173332674ff1 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
   return Result;
 }
 
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
   bool Changes = false;
 
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 168eff25bb22..7cae9fc0d0eb 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -51,6 +51,16 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
   let Size = 8;
 }
 
+class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64 <outs, ins, asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let VOP3 = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Scalar operations
 //===----------------------------------------------------------------------===//
@@ -207,7 +217,7 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
 }
 
 class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    Enc64 <outs, ins, asm, pattern> {
+    VOP3Common <outs, ins, asm, pattern> {
 
   bits<8> dst;
   bits<2> src0_modifiers;
@@ -233,16 +243,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let Inst{61} = src0_modifiers{0};
   let Inst{62} = src1_modifiers{0};
   let Inst{63} = src2_modifiers{0};
-  
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let VOP3 = 1;
+
 }
 
 class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    Enc64 <outs, ins, asm, pattern> {
+    VOP3Common <outs, ins, asm, pattern> {
 
   bits<8> dst;
   bits<2> src0_modifiers;
@@ -266,11 +271,6 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let Inst{62} = src1_modifiers{0};
   let Inst{63} = src2_modifiers{0};
 
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let VOP3 = 1;
 }
 
 class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index bb13c3eb86a0..f5b82d53ba7a 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -664,6 +664,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
   case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
   case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
   case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
   case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
@@ -679,6 +680,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_LOAD_DWORDX4_IMM:
   case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
   }
 }
 
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index bfd514766acf..c4994a250a8e 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -266,7 +266,7 @@ class SIMCInstr <string pseudo, int subtarget> {
 multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName> {
 
-  def "" : InstSI <outs, ins, "", pattern>, VOP <opName>,
+  def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
            SIMCInstr<OpName, SISubtarget.NONE> {
     let isPseudo = 1;
   }
@@ -363,12 +363,13 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
 }
 
 multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
-                        string opName, ValueType vt, PatLeaf cond> {
-
+                        string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
   def _e32 : VOPC <
     op, (ins arc:$src0, vrc:$src1),
     opName#"_e32 $dst, $src0, $src1", []
-  >, VOP <opName>;
+  >, VOP <opName> {
+    let Defs = !if(defExec, [VCC, EXEC], [VCC]);
+  }
 
   def _e64 : VOP3 <
     {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -381,6 +382,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
       [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
     )
   >, VOP <opName> {
+    let Defs = !if(defExec, [EXEC], []);
     let src2 = SIOperand.ZERO;
     let src2_modifiers = 0;
   }
@@ -394,6 +396,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
   ValueType vt = untyped, PatLeaf cond = COND_NULL>
   : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
 
+multiclass VOPCX_32 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+
+multiclass VOPCX_64 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+
 multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
   op, (outs VReg_32:$dst),
   (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
@@ -416,9 +426,11 @@ class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
 
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_64:$dst),
-  (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
-   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
-  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+  (ins InputMods:$src0_modifiers, VSrc_64:$src0,
+       InputMods:$src1_modifiers, VSrc_64:$src1,
+       InputMods:$src2_modifiers, VSrc_64:$src2,
+       InstFlag:$clamp, InstFlag:$omod),
+  opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
 >, VOP <opName>;
 
 //===----------------------------------------------------------------------===//
@@ -700,6 +712,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
   defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
 }
 
+class MIMG_Gather_Helper <bits<7> op, string asm,
+                          RegisterClass dst_rc,
+                          RegisterClass src_rc> : MIMG <
+  op,
+  (outs dst_rc:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+       SReg_256:$srsrc, SReg_128:$ssamp),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+
+  // DMASK was repurposed for GATHER4. 4 components are always
+  // returned and DMASK works like a swizzle - it selects
+  // the component to fetch. The only useful DMASK values are
+  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+  // (red,red,red,red) etc.) The ISA document doesn't mention
+  // this.
+  // Therefore, disable all code which updates DMASK by setting these two:
+  let MIMG = 0;
+  let hasPostISelHook = 0;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
+  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+}
+
 //===----------------------------------------------------------------------===//
 // Vector instruction mappings
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index be7d8a6d4ab9..26024dc4f2df 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -101,7 +101,9 @@ def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
 >;
 def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
 def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+  [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+>;
 def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
 } // End neverHasSideEffects = 1
 
@@ -112,11 +114,17 @@ def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
 >;
 def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
 
-////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
 ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+  [(set i32:$dst, (cttz_zero_undef i32:$src0))]
+>;
 ////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+  [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+>;
+
 //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
 def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
 //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
@@ -325,7 +333,7 @@ def S_CMPK_EQ_I32 : SOPK <
 >;
 */
 
-let isCompare = 1 in {
+let isCompare = 1, Defs = [SCC] in {
 def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
 def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
 def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
@@ -337,7 +345,7 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
 def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
 def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
 def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-} // End isCompare = 1
+} // End isCompare = 1, Defs = [SCC]
 
 let Defs = [SCC], isCommutable = 1 in {
   def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -472,26 +480,26 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
 defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
 defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
 defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
@@ -510,26 +518,26 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
 defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
 defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
 defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
@@ -548,26 +556,26 @@ defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
 defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
 defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
 defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
@@ -616,18 +624,18 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
 defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
 defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
 defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
@@ -638,18 +646,18 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
 defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
 defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
 defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
@@ -660,18 +668,18 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
 defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
 defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
 defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
@@ -682,30 +690,30 @@ defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
 defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
 defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
 
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
 
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
 
 defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1
 
 defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
 
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1
 
 } // End isCompare = 1
 
@@ -979,31 +987,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
 //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
 //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
 //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+defm IMAGE_GATHER4          : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
+defm IMAGE_GATHER4_CL       : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
+defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
+defm IMAGE_GATHER4_B        : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
+defm IMAGE_GATHER4_B_CL     : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
+defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
+defm IMAGE_GATHER4_C        : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
+defm IMAGE_GATHER4_C_CL     : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
+defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
+defm IMAGE_GATHER4_C_B      : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
+defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
+defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
+defm IMAGE_GATHER4_O        : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
+defm IMAGE_GATHER4_CL_O     : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
+defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
+defm IMAGE_GATHER4_B_O      : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
+defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
+defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
+defm IMAGE_GATHER4_C_O      : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
+defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
+defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
+defm IMAGE_GATHER4_C_B_O    : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
+defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
 //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
 //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
 //def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
@@ -1108,22 +1116,23 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
 defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
   [(set f32:$dst, (flog2 f32:$src0))]
 >;
+
 defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
 defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
 defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-  [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+  [(set f32:$dst, (AMDGPUrcp f32:$src0))]
 >;
 defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
 defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
 defm V_RSQ_LEGACY_F32 : VOP1_32 <
   0x0000002d, "V_RSQ_LEGACY_F32",
-  [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
+  [(set f32:$dst, (AMDGPUrsq f32:$src0))]
 >;
 defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
   [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))]
 >;
 defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
-  [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+  [(set f64:$dst, (AMDGPUrcp f64:$src0))]
 >;
 defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
 defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
@@ -1409,8 +1418,12 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
 //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
 defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
 ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
+  [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
+  [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
+>;
 
 def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
   [(set i64:$dst, (shl i64:$src0, i32:$src1))]
@@ -1444,12 +1457,19 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
 
 defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
 def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
+  [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
+  [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+>;
 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
 //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
 //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
+  [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+>;
 
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
@@ -1731,6 +1751,24 @@ def : Pat <
   (S_XOR_B64 $src0, $src1)
 >;
 
+//===----------------------------------------------------------------------===//
+// SOPP Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (int_AMDGPU_barrier_global),
+  (S_BARRIER)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : RcpPat<V_RCP_F32_e32, f32>;
+def : RcpPat<V_RCP_F64_e32, f64>;
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
+
 //===----------------------------------------------------------------------===//
 // VOP2 Patterns
 //===----------------------------------------------------------------------===//
@@ -1756,6 +1794,58 @@ def : SextInReg <i16, 16>;
 /********** Image sampling patterns **********/
 /********** ======================= **********/
 
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm,
+        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+          $addr, $rsrc, $sampler)
+>;
+
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
+def : SampleRawPattern<int_SI_gather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
 /* SIsample for simple 1D texture lookup */
 def : Pat <
   (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
@@ -2551,13 +2641,13 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
   // 1. Extract with offset
   def : Pat<
     (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
   >;
 
   // 2. Extract without offset
   def : Pat<
     (vector_extract vt:$vec, i32:$idx),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+    (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
   >;
 
   // 3. Insert with offset
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 00e32c03a99e..df690a4f4b87 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in {
 
   class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 
+  // Fully-flexible SAMPLE instruction.
+  class SampleRaw : Intrinsic <
+    [llvm_v4f32_ty],    // vdata(VGPR)
+    [llvm_anyint_ty,    // vaddr(VGPR)
+     llvm_v32i8_ty,     // rsrc(SGPR)
+     llvm_v16i8_ty,     // sampler(SGPR)
+     llvm_i32_ty,       // dmask(imm)
+     llvm_i32_ty,       // unorm(imm)
+     llvm_i32_ty,       // r128(imm)
+     llvm_i32_ty,       // da(imm)
+     llvm_i32_ty,       // glc(imm)
+     llvm_i32_ty,       // slc(imm)
+     llvm_i32_ty,       // tfe(imm)
+     llvm_i32_ty],      // lwe(imm)
+    [IntrNoMem]>;
+
   def int_SI_sample : Sample;
   def int_SI_sampleb : Sample;
   def int_SI_sampled : Sample;
   def int_SI_samplel : Sample;
 
+  // Basic gather4
+  def int_SI_gather4 : SampleRaw;
+  def int_SI_gather4_cl : SampleRaw;
+  def int_SI_gather4_l : SampleRaw;
+  def int_SI_gather4_b : SampleRaw;
+  def int_SI_gather4_b_cl : SampleRaw;
+  def int_SI_gather4_lz : SampleRaw;
+
+  // Gather4 with comparison
+  def int_SI_gather4_c : SampleRaw;
+  def int_SI_gather4_c_cl : SampleRaw;
+  def int_SI_gather4_c_l : SampleRaw;
+  def int_SI_gather4_c_b : SampleRaw;
+  def int_SI_gather4_c_b_cl : SampleRaw;
+  def int_SI_gather4_c_lz : SampleRaw;
+
+  // Gather4 with offsets
+  def int_SI_gather4_o : SampleRaw;
+  def int_SI_gather4_cl_o : SampleRaw;
+  def int_SI_gather4_l_o : SampleRaw;
+  def int_SI_gather4_b_o : SampleRaw;
+  def int_SI_gather4_b_cl_o : SampleRaw;
+  def int_SI_gather4_lz_o : SampleRaw;
+
+  // Gather4 with comparison and offsets
+  def int_SI_gather4_c_o : SampleRaw;
+  def int_SI_gather4_c_cl_o : SampleRaw;
+  def int_SI_gather4_c_l_o : SampleRaw;
+  def int_SI_gather4_c_b_o : SampleRaw;
+  def int_SI_gather4_c_b_cl_o : SampleRaw;
+  def int_SI_gather4_c_lz_o : SampleRaw;
+
+  def int_SI_getlod : SampleRaw;
+
   def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 6601f2a98065..9f5ff29ad93a 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -86,6 +86,7 @@ class SILowerControlFlowPass : public MachineFunctionPass {
   void Kill(MachineInstr &MI);
   void Branch(MachineInstr &MI);
 
+  void InitM0ForLDS(MachineBasicBlock::iterator MI);
   void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
   void IndirectSrc(MachineInstr &MI);
   void IndirectDst(MachineInstr &MI);
@@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+/// The m0 register stores the maximum allowable address for LDS reads and
+/// writes.  Its value must be at least the size in bytes of LDS allocated by
+/// the shader.  For simplicity, we set it to the maximum possible value.
+void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),  TII->get(AMDGPU::S_MOV_B32),
+            AMDGPU::M0).addImm(0xffffffff);
+}
+
 void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
 
   MachineBasicBlock &MBB = *MI.getParent();
@@ -333,52 +342,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
             .addReg(Idx);
     MBB.insert(I, MovRel);
-    MI.eraseFromParent();
-    return;
-  }
+  } else {
 
-  assert(AMDGPU::SReg_64RegClass.contains(Save));
-  assert(AMDGPU::VReg_32RegClass.contains(Idx));
+    assert(AMDGPU::SReg_64RegClass.contains(Save));
+    assert(AMDGPU::VReg_32RegClass.contains(Idx));
 
-  // Save the EXEC mask
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
-          .addReg(AMDGPU::EXEC);
+    // Save the EXEC mask
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+            .addReg(AMDGPU::EXEC);
 
-  // Read the next variant into VCC (lower 32 bits) <- also loop target
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-          AMDGPU::VCC_LO)
-          .addReg(Idx);
+    // Read the next variant into VCC (lower 32 bits) <- also loop target
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+            AMDGPU::VCC_LO)
+            .addReg(Idx);
 
-  // Move index from VCC into M0
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-          .addReg(AMDGPU::VCC_LO);
+    // Move index from VCC into M0
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+            .addReg(AMDGPU::VCC_LO);
 
-  // Compare the just read M0 value to all possible Idx values
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
-          .addReg(AMDGPU::M0)
-          .addReg(Idx);
+    // Compare the just read M0 value to all possible Idx values
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+            .addReg(AMDGPU::M0)
+            .addReg(Idx);
 
-  // Update EXEC, save the original EXEC value to VCC
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
-          .addReg(AMDGPU::VCC);
+    // Update EXEC, save the original EXEC value to VCC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+            .addReg(AMDGPU::VCC);
 
-  // Do the actual move
-  MBB.insert(I, MovRel);
+    // Do the actual move
+    MBB.insert(I, MovRel);
 
-  // Update EXEC, switch all done bits to 0 and all todo bits to 1
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(AMDGPU::VCC);
+    // Update EXEC, switch all done bits to 0 and all todo bits to 1
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+            .addReg(AMDGPU::EXEC)
+            .addReg(AMDGPU::VCC);
 
-  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-          .addImm(-7)
-          .addReg(AMDGPU::EXEC);
+    // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+            .addImm(-7)
+            .addReg(AMDGPU::EXEC);
 
-  // Restore EXEC
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
-          .addReg(Save);
+    // Restore EXEC
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+            .addReg(Save);
 
+  }
+  // FIXME: Are there any values other than the LDS address clamp that need to
+  // be stored in the m0 register and may be live for more than a few
+  // instructions?  If so, we should save the m0 register at the beginning
+  // of this function and restore it here.
+  // FIXME: Add support for LDS direct loads.
+  InitM0ForLDS(&MI);
   MI.eraseFromParent();
 }
 
@@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock &MBB = MF.front();
     // Initialize M0 to a value that won't cause LDS access to be discarded
     // due to offset clamping
-    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
-            AMDGPU::M0).addImm(0xffffffff);
+    InitM0ForLDS(MBB.getFirstNonPHI());
   }
 
   if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index f1f01deaf361..8974b6300625 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
   (add SGPR_64Regs, VCCReg, EXECReg)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
 
 def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
 
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 79075b06bdcd..9df005401897 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -330,7 +330,7 @@ class SparcOperand : public MCParsedAsmOperand {
     unsigned Reg = Op.getReg();
     unsigned regIdx = 0;
     switch (Op.Reg.Kind) {
-    default: assert(0 && "Unexpected register kind!");
+    default: llvm_unreachable("Unexpected register kind!");
     case rk_FloatReg:
       regIdx = Reg - Sparc::F0;
       if (regIdx % 4 || regIdx > 31)
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 261fb3838d37..5975a517994a 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
 bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
                                   raw_ostream &O)
 {
-  assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX.");
+  llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 7d517b699b22..dcd81e3d6249 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -196,12 +196,12 @@ namespace {
                               const MCRelaxableFragment *DF,
                               const MCAsmLayout &Layout) const override {
       // FIXME.
-      assert(0 && "fixupNeedsRelaxation() unimplemented");
+      llvm_unreachable("fixupNeedsRelaxation() unimplemented");
       return false;
     }
     void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
       // FIXME.
-      assert(0 && "relaxInstruction() unimplemented");
+      llvm_unreachable("relaxInstruction() unimplemented");
     }
 
     bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index b19ad7b45ca6..eea9626c17b0 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   if (Expr->EvaluateAsAbsolute(Res))
     return Res;
 
-  assert(0 && "Unhandled expression!");
+  llvm_unreachable("Unhandled expression!");
   return 0;
 }
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index ae57fdc4eebd..3ccdd038fb33 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -124,7 +124,7 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
 
 Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
   switch (Kind) {
-  default:           assert(0 && "Unhandled SparcMCExpr::VariantKind");
+  default: llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
   case VK_Sparc_LO:       return Sparc::fixup_sparc_lo10;
   case VK_Sparc_HI:       return Sparc::fixup_sparc_hi22;
   case VK_Sparc_H44:      return Sparc::fixup_sparc_h44;
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
index c775e9e83ee6..d0eec98b5e91 100644
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -213,7 +213,8 @@ extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
 
 
 void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+  llvm_unreachable("FIXME: Implement SparcJITInfo::"
+                   "replaceMachineCodeForFunction");
 }
 
 
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 0c388f8fb26c..ce00fcc62737 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,6 +39,10 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
   return enableMachineScheduler();
 }
 
+bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+  return true;
+}
+
 bool TargetSubtargetInfo::enableMachineScheduler() const {
   return false;
 }
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b30eeeb0e037..3e57914f9e94 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1061,7 +1061,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
     if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
       break;
 
-    switch (getLexer().getKind()) {
+    AsmToken::TokenKind TK = getLexer().getKind();
+    switch (TK) {
     default: {
       if (SM.isValidEndState()) {
         Done = true;
@@ -1073,13 +1074,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       Done = true;
       break;
     }
+    case AsmToken::String:
     case AsmToken::Identifier: {
       // This could be a register or a symbolic displacement.
       unsigned TmpReg;
       const MCExpr *Val;
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
-      if(!ParseRegister(TmpReg, IdentLoc, End)) {
+      if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
         SM.onRegister(TmpReg);
         UpdateLocLex = false;
         break;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 80cc99bb80cc..6bccd1290fec 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -155,6 +155,46 @@ class X86FastISel final : public FastISel {
 
 } // end anonymous namespace.
 
+static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
+  // If both operands are the same, then try to optimize or fold the cmp.
+  CmpInst::Predicate Predicate = CI->getPredicate();
+  if (CI->getOperand(0) != CI->getOperand(1))
+    return Predicate;
+
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid predicate!");
+  case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OEQ:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_OGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OGE:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_OLT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_OLE:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_ONE:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::FCMP_ORD:   Predicate = CmpInst::FCMP_ORD;   break;
+  case CmpInst::FCMP_UNO:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_UEQ:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_UGT:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_ULT:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::FCMP_UNE:   Predicate = CmpInst::FCMP_UNO;   break;
+  case CmpInst::FCMP_TRUE:  Predicate = CmpInst::FCMP_TRUE;  break;
+
+  case CmpInst::ICMP_EQ:    Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_NE:    Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_UGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_UGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_ULT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_ULE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_SGT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_SGE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  case CmpInst::ICMP_SLT:   Predicate = CmpInst::FCMP_FALSE; break;
+  case CmpInst::ICMP_SLE:   Predicate = CmpInst::FCMP_TRUE;  break;
+  }
+
+  return Predicate;
+}
+
 static std::pair<X86::CondCode, bool>
 getX86ConditonCode(CmpInst::Predicate Predicate) {
   X86::CondCode CC = X86::COND_INVALID;
@@ -1048,21 +1088,61 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
     return false;
 
+  // Try to optimize or fold the cmp.
+  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+  unsigned ResultReg = 0;
+  switch (Predicate) {
+  default: break;
+  case CmpInst::FCMP_FALSE: {
+    ResultReg = createResultReg(&X86::GR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
+            ResultReg);
+    ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+                                           X86::sub_8bit);
+    if (!ResultReg)
+      return false;
+    break;
+  }
+  case CmpInst::FCMP_TRUE: {
+    ResultReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+            ResultReg).addImm(1);
+    break;
+  }
+  }
+
+  if (ResultReg) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  const Value *LHS = CI->getOperand(0);
+  const Value *RHS = CI->getOperand(1);
+
+  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+  // We don't have to materialize a zero constant for this case and can just use
+  // %x again on the RHS.
+  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+    const auto *RHSC = dyn_cast<ConstantFP>(RHS);
+    if (RHSC && RHSC->isNullValue())
+      RHS = LHS;
+  }
+
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
-  static unsigned SETFOpcTable[2][2] = {
-    { X86::SETEr,  X86::SETNPr },
-    { X86::SETNEr, X86::SETPr  }
+  static unsigned SETFOpcTable[2][3] = {
+    { X86::SETEr,  X86::SETNPr, X86::AND8rr },
+    { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
   };
   unsigned *SETFOpc = nullptr;
-  switch (CI->getPredicate()) {
+  switch (Predicate) {
   default: break;
   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
   case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
   }
 
-  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+  ResultReg = createResultReg(&X86::GR8RegClass);
   if (SETFOpc) {
-    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+    if (!X86FastEmitCompare(LHS, RHS, VT))
       return false;
 
     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
@@ -1071,24 +1151,22 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
             FlagReg1);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
             FlagReg2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::AND8rr),
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
     UpdateValueMap(I, ResultReg);
     return true;
   }
 
   X86::CondCode CC;
-  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
-  std::tie(CC, SwapArgs) = getX86ConditonCode(CI->getPredicate());
+  bool SwapArgs;
+  std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate);
   assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code.");
   unsigned Opc = X86::getSETFromCond(CC);
 
-  const Value *LHS = CI->getOperand(0);
-  const Value *RHS = CI->getOperand(1);
   if (SwapArgs)
     std::swap(LHS, RHS);
 
-  // Emit a compare of Op0/Op1.
+  // Emit a compare of LHS/RHS.
   if (!X86FastEmitCompare(LHS, RHS, VT))
     return false;
 
@@ -1162,8 +1240,28 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
 
+      // Try to optimize or fold the cmp.
+      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+      switch (Predicate) {
+      default: break;
+      case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
+      case CmpInst::FCMP_TRUE:  FastEmitBranch(TrueMBB, DbgLoc); return true;
+      }
+
+      const Value *CmpLHS = CI->getOperand(0);
+      const Value *CmpRHS = CI->getOperand(1);
+
+      // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
+      // 0.0.
+      // We don't have to materialize a zero constant for this case and can just
+      // use %x again on the RHS.
+      if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+        const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+        if (CmpRHSC && CmpRHSC->isNullValue())
+          CmpRHS = CmpLHS;
+      }
+
       // Try to take advantage of fallthrough opportunities.
-      CmpInst::Predicate Predicate = CI->getPredicate();
       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
         std::swap(TrueMBB, FalseMBB);
         Predicate = CmpInst::getInversePredicate(Predicate);
@@ -1186,14 +1284,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       }
 
       X86::CondCode CC;
-      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
-      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+      bool SwapArgs;
+      unsigned BranchOpc;
       std::tie(CC, SwapArgs) = getX86ConditonCode(Predicate);
       assert(CC <= X86::LAST_VALID_COND && "Unexpected conditon code.");
 
       BranchOpc = X86::GetCondBranchFromCond(CC);
-      const Value *CmpLHS = CI->getOperand(0);
-      const Value *CmpRHS = CI->getOperand(1);
       if (SwapArgs)
         std::swap(CmpLHS, CmpRHS);
 
@@ -1219,7 +1315,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
 
       // Emits an unconditional branch to the FalseBB, obtains the branch
-      // weight, andd adds it to the successor list.
+      // weight, and adds it to the successor list.
       FastEmitBranch(FalseMBB, DbgLoc);
 
       return true;
@@ -2717,15 +2813,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
     return 0;
   }
 
-  // Materialize addresses with LEA instructions.
+  // Materialize addresses with LEA/MOV instructions.
   if (isa<GlobalValue>(C)) {
-    // LEA can only handle 32 bit immediates. Currently this happens pretty
-    // rarely, so rather than deal with it just bail out of fast isel. If any
-    // architectures endis up needing to use this path a lot then fast isel
-    // could get the address with a MOV64ri and use that to load the value.
-    if (TM.getRelocationModel() == Reloc::Static && Subtarget->is64Bit())
-      return false;
-
     X86AddressMode AM;
     if (X86SelectAddress(C, AM)) {
       // If the expression is just a basereg, then we're done, otherwise we need
@@ -2734,10 +2823,19 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
           AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
         return AM.Base.Reg;
 
-      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
       unsigned ResultReg = createResultReg(RC);
-      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      if (TM.getRelocationModel() == Reloc::Static &&
+          TLI.getPointerTy() == MVT::i64) {
+        // The displacement code be more than 32 bits away so we need to use
+        // an instruction with a 64 bit immediate
+        Opc = X86::MOV64ri;
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
+      } else {
+        Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+        addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                              TII.get(Opc), ResultReg), AM);
+      }
       return ResultReg;
     }
     return 0;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 851607eac96e..09018a5e1187 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6077,21 +6077,35 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
 /// This function only analyzes elements of \p N whose indices are
 /// in range [BaseIdx, LastIdx).
 static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
+                              SelectionDAG &DAG,
                               unsigned BaseIdx, unsigned LastIdx,
                               SDValue &V0, SDValue &V1) {
+  EVT VT = N->getValueType(0);
+
   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
-  assert(N->getValueType(0).isVector() &&
-         N->getValueType(0).getVectorNumElements() >= LastIdx &&
+  assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
          "Invalid Vector in input!");
   
   bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
   bool CanFold = true;
   unsigned ExpectedVExtractIdx = BaseIdx;
   unsigned NumElts = LastIdx - BaseIdx;
+  V0 = DAG.getUNDEF(VT);
+  V1 = DAG.getUNDEF(VT);
 
   // Check if N implements a horizontal binop.
   for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
     SDValue Op = N->getOperand(i + BaseIdx);
+
+    // Skip UNDEFs.
+    if (Op->getOpcode() == ISD::UNDEF) {
+      // Update the expected vector extract index.
+      if (i * 2 == NumElts)
+        ExpectedVExtractIdx = BaseIdx;
+      ExpectedVExtractIdx += 2;
+      continue;
+    }
+
     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
 
     if (!CanFold)
@@ -6112,12 +6126,15 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 
     unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
     unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
- 
-    if (i == 0)
-      V0 = Op0.getOperand(0);
-    else if (i * 2 == NumElts) {
-      V1 = Op0.getOperand(0);
-      ExpectedVExtractIdx = BaseIdx;
+
+    if (i * 2 < NumElts) {
+      if (V0.getOpcode() == ISD::UNDEF)
+        V0 = Op0.getOperand(0);
+    } else {
+      if (V1.getOpcode() == ISD::UNDEF)
+        V1 = Op0.getOperand(0);
+      if (i * 2 == NumElts)
+        ExpectedVExtractIdx = BaseIdx;
     }
 
     SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
@@ -6163,9 +6180,14 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
 ///   Example:
 ///     HADD V0_LO, V1_LO
 ///     HADD V0_HI, V1_HI
+///
+/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
+/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
+/// the upper 128-bits of the result.
 static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
                                      SDLoc DL, SelectionDAG &DAG,
-                                     unsigned X86Opcode, bool Mode) {
+                                     unsigned X86Opcode, bool Mode,
+                                     bool isUndefLO, bool isUndefHI) {
   EVT VT = V0.getValueType();
   assert(VT.is256BitVector() && VT == V1.getValueType() &&
          "Invalid nodes in input!");
@@ -6177,13 +6199,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
   SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
   EVT NewVT = V0_LO.getValueType();
 
-  SDValue LO, HI;
+  SDValue LO = DAG.getUNDEF(NewVT);
+  SDValue HI = DAG.getUNDEF(NewVT);
+
   if (Mode) {
-    LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
-    HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
+    // Don't emit a horizontal binop if the result is expected to be UNDEF.
+    if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
+      LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
+    if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
+      HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
   } else {
-    LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
-    HI = DAG.getNode(X86Opcode, DL, NewVT, V1_HI, V1_HI);
+    // Don't emit a horizontal binop if the result is expected to be UNDEF.
+    if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
+                       V1_LO->getOpcode() != ISD::UNDEF))
+      LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
+
+    if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
+                       V1_HI->getOpcode() != ISD::UNDEF))
+      HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
   }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
@@ -6198,19 +6231,37 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   SDValue InVec0, InVec1;
 
   // Try to match horizontal ADD/SUB.
+  unsigned NumUndefsLO = 0;
+  unsigned NumUndefsHI = 0;
+  unsigned Half = NumElts/2;
+
+  // Count the number of UNDEF operands in the build_vector in input.
+  for (unsigned i = 0, e = Half; i != e; ++i)
+    if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+      NumUndefsLO++;
+
+  for (unsigned i = Half, e = NumElts; i != e; ++i)
+    if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+      NumUndefsHI++;
+
+  // Early exit if this is either a build_vector of all UNDEFs or all the
+  // operands but one are UNDEF.
+  if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
+    return SDValue();
+
   if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
     // Try to match an SSE3 float HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
     
-    if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
   } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
     // Try to match an SSSE3 integer HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
     
-    if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
       return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
   }
   
@@ -6221,16 +6272,20 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
     // Try to match an AVX horizontal add/sub of packed single/double
     // precision floating point values from 256-bit vectors.
     SDValue InVec2, InVec3;
-    if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FADD, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
 
-    if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FSUB, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
   } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
     // Try to match an AVX2 horizontal add/sub of signed integers.
@@ -6238,15 +6293,19 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
     unsigned X86Opcode;
     bool CanFold = true;
 
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::ADD, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts/2, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::SUB, NumElts/2, NumElts, InVec2, InVec3) &&
-        InVec0.getNode() == InVec2.getNode() &&
-        InVec1.getNode() == InVec3.getNode())
+    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
+        ((InVec0.getOpcode() == ISD::UNDEF ||
+          InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+        ((InVec1.getOpcode() == ISD::UNDEF ||
+          InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
       X86Opcode = X86ISD::HSUB;
     else
       CanFold = false;
@@ -6257,29 +6316,45 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
       if (Subtarget->hasAVX2())
         return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
  
+      // Do not try to expand this build_vector into a pair of horizontal
+      // add/sub if we can emit a pair of scalar add/sub.
+      if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+        return SDValue();
+
       // Convert this build_vector into a pair of horizontal binop followed by
       // a concat vector.
-      return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false);
+      bool isUndefLO = NumUndefsLO == Half;
+      bool isUndefHI = NumUndefsHI == Half;
+      return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
+                                   isUndefLO, isUndefHI);
     }
   }
 
   if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
        VT == MVT::v16i16) && Subtarget->hasAVX()) {
     unsigned X86Opcode;
-    if (isHorizontalBinOp(BV, ISD::ADD, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HSUB;
-    else if (isHorizontalBinOp(BV, ISD::FADD, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::FHADD;
-    else if (isHorizontalBinOp(BV, ISD::FSUB, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::FHSUB;
     else
       return SDValue();
 
+    // Don't try to expand this build_vector into a pair of horizontal add/sub
+    // if we can simply emit a pair of scalar add/sub.
+    if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+      return SDValue();
+
     // Convert this build_vector into two horizontal add/sub followed by
     // a concat vector.
-    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true);
+    bool isUndefLO = NumUndefsLO == Half;
+    bool isUndefHI = NumUndefsHI == Half;
+    return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
+                                 isUndefLO, isUndefHI);
   }
 
   return SDValue();
@@ -12438,6 +12513,20 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
                        Op.getOperand(1), Op.getOperand(2));
   }
 
+  case Intrinsic::x86_sse2_packssdw_128:
+  case Intrinsic::x86_sse2_packsswb_128:
+  case Intrinsic::x86_avx2_packssdw:
+  case Intrinsic::x86_avx2_packsswb:
+    return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
+  case Intrinsic::x86_sse2_packuswb_128:
+  case Intrinsic::x86_sse41_packusdw:
+  case Intrinsic::x86_avx2_packuswb:
+  case Intrinsic::x86_avx2_packusdw:
+    return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
     return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
@@ -15211,6 +15300,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TESTM:              return "X86ISD::TESTM";
   case X86ISD::TESTNM:             return "X86ISD::TESTNM";
   case X86ISD::KORTEST:            return "X86ISD::KORTEST";
+  case X86ISD::PACKSS:             return "X86ISD::PACKSS";
+  case X86ISD::PACKUS:             return "X86ISD::PACKUS";
   case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
   case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index af2e4344e5b6..df9ab3aa0681 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -315,6 +315,8 @@ namespace llvm {
       KORTEST,
 
       // Several flavors of instructions with vector shuffle behaviors.
+      PACKSS,
+      PACKUS,
       PALIGNR,
       PSHUFD,
       PSHUFHW,
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index c132663f473e..7cac5ebbece7 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1798,7 +1798,36 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
                             "vmovntdqa\t{$src, $dst|$dst, $src}",
                             [(set VR512:$dst,
                               (int_x86_avx512_movntdqa addr:$src))]>,
-                   EVEX, EVEX_V512;
+                   EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// Prefer non-temporal over temporal versions
+let AddedComplexity = 400, SchedRW = [WriteStore] in {
+
+def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
+                            (ins f512mem:$dst, VR512:$src),
+                            "vmovntps\t{$src, $dst|$dst, $src}",
+                            [(alignednontemporalstore (v16f32 VR512:$src),
+                                                      addr:$dst)],
+                            IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
+                            (ins f512mem:$dst, VR512:$src),
+                            "vmovntpd\t{$src, $dst|$dst, $src}",
+                            [(alignednontemporalstore (v8f64 VR512:$src),
+                                                      addr:$dst)],
+			    IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+
+def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
+                           (ins i512mem:$dst, VR512:$src),
+                           "vmovntdq\t{$src, $dst|$dst, $src}",
+                           [(alignednontemporalstore (v8i64 VR512:$src),
+                                                     addr:$dst)],
+                           IIC_SSE_MOVNT>,
+                  EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - Integer arithmetic
@@ -3174,6 +3203,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
            (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
+def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
+           (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
                    (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
           (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1582f4388192..6f0fa9462770 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
+def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
+
 def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 043b2f32c6ff..11c3f11f2cdc 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4336,20 +4336,6 @@ defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
 defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
                              SSE_INTALU_ITINS_P, 0>;
 
-//===---------------------------------------------------------------------===//
-// SSE2 - Packed Integer Pack Instructions
-//===---------------------------------------------------------------------===//
-
-defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
-                                  int_x86_avx2_packsswb,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
-                                  int_x86_avx2_packssdw,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
-                                  int_x86_avx2_packuswb,
-                                  SSE_INTALU_ITINS_SHUFF_P, 0>;
-
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Shuffle Instructions
 //===---------------------------------------------------------------------===//
@@ -4431,6 +4417,136 @@ let Predicates = [UseSSE2] in {
             (PSHUFDri VR128:$src1, imm:$imm)>;
 }
 
+//===---------------------------------------------------------------------===//
+// Packed Integer Pack Instructions (SSE & AVX)
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+                     bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg,
+               (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+               !if(Is2Addr,
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+               [(set VR128:$dst,
+                     (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+               Sched<[WriteShuffle]>;
+  def rm : PDI<opc, MRMSrcMem,
+               (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+               !if(Is2Addr,
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   !strconcat(OpcodeStr,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+               [(set VR128:$dst,
+                     (OutVT (OpNode VR128:$src1,
+                                    (bc_frag (memopv2i64 addr:$src2)))))]>,
+               Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                       ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+  def Yrr : PDI<opc, MRMSrcReg,
+                (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+                !strconcat(OpcodeStr,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR256:$dst,
+                      (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+                Sched<[WriteShuffle]>;
+  def Yrm : PDI<opc, MRMSrcMem,
+                (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+                !strconcat(OpcodeStr,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR256:$dst,
+                      (OutVT (OpNode VR256:$src1,
+                                     (bc_frag (memopv4i64 addr:$src2)))))]>,
+                Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+                     bit Is2Addr = 1> {
+  def rr : SS48I<opc, MRMSrcReg,
+                 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !if(Is2Addr,
+                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr,
+                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+                 [(set VR128:$dst,
+                       (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+                 Sched<[WriteShuffle]>;
+  def rm : SS48I<opc, MRMSrcMem,
+                 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                 !if(Is2Addr,
+                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                     !strconcat(OpcodeStr,
+                                "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+                 [(set VR128:$dst,
+                       (OutVT (OpNode VR128:$src1,
+                                      (bc_frag (memopv2i64 addr:$src2)))))]>,
+                 Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+                     ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+  def Yrr : SS48I<opc, MRMSrcReg,
+                  (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+                  !strconcat(OpcodeStr,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set VR256:$dst,
+                        (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+                  Sched<[WriteShuffle]>;
+  def Yrm : SS48I<opc, MRMSrcMem,
+                  (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+                  !strconcat(OpcodeStr,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set VR256:$dst,
+                        (OutVT (OpNode VR256:$src1,
+                                       (bc_frag (memopv4i64 addr:$src2)))))]>,
+                  Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+  defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
+                             bc_v8i16, 0>, VEX_4V;
+  defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
+                             bc_v4i32, 0>, VEX_4V;
+
+  defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
+                             bc_v8i16, 0>, VEX_4V;
+  defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
+                             bc_v4i32, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
+                               bc_v16i16>, VEX_4V, VEX_L;
+  defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
+                               bc_v8i32>, VEX_4V, VEX_L;
+
+  defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
+                               bc_v16i16>, VEX_4V, VEX_L;
+  defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
+                               bc_v8i32>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
+                            bc_v8i16>;
+  defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
+                            bc_v4i32>;
+
+  defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
+                            bc_v8i16>;
+
+  let Predicates = [HasSSE41] in
+  defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
+                            bc_v4i32>;
+}
+} // ExeDomain = SSEPackedInt
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Unpack Instructions
 //===---------------------------------------------------------------------===//
@@ -7053,8 +7169,6 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
-                                      0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V;
   defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
@@ -7086,9 +7200,6 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
-                                        int_x86_avx2_packusdw, WriteShuffle>,
-                                        VEX_4V, VEX_L;
   defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
@@ -7120,8 +7231,6 @@ let Predicates = [HasAVX2] in {
 
 let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in
-  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw,
-                                     1, DEFAULT_ITINS_SHUFFLESCHED>;
   defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
   defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 299f9a581b8d..9fa911967e7a 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -402,17 +402,47 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
 unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                                 Type *SubTp) const {
-  // We only estimate the cost of reverse shuffles.
-  if (Kind != SK_Reverse)
+  // We only estimate the cost of reverse and alternate shuffles.
+  if (Kind != SK_Reverse && Kind != SK_Alternate)
     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
-  unsigned Cost = 1;
-  if (LT.second.getSizeInBits() > 128)
-    Cost = 3; // Extract + insert + copy.
+  if (Kind == SK_Reverse) {
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    unsigned Cost = 1;
+    if (LT.second.getSizeInBits() > 128)
+      Cost = 3; // Extract + insert + copy.
 
-  // Multiple by the number of parts.
-  return Cost * LT.first;
+    // Multiple by the number of parts.
+    return Cost * LT.first;
+  }
+
+  if (Kind == SK_Alternate) {
+    static const CostTblEntry<MVT::SimpleValueType> X86AltShuffleTbl[] = {
+        // Alt shuffle cost table for X86. Cost is the number of instructions
+        // required to create the shuffled vector.
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 8},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}};
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+    int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+    if (Idx == -1)
+      return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+    return LT.first * X86AltShuffleTbl[Idx].Cost;
+  }
+
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
 unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f6658964ae85..5b28d8ccb5b2 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -865,30 +865,6 @@ Value *FAddCombine::createAddendVal
   return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
 }
 
-// dyn_castFoldableMul - If this value is a multiply that can be folded into
-// other computations (because it has a constant operand), return the
-// non-constant operand of the multiply, and set CST to point to the multiplier.
-// Otherwise, return null.
-//
-static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
-    return nullptr;
-
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return nullptr;
-
-  if (I->getOpcode() == Instruction::Mul)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1))))
-      return I->getOperand(0);
-  if (I->getOpcode() == Instruction::Shl)
-    if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
-      // The multiplier is really 1 << CST.
-      CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
-      return I->getOperand(0);
-    }
-  return nullptr;
-}
-
 // If one of the operands only has one non-zero bit, and if the other
 // operand has a known-zero bit in a more significant place than it (not
 // including the sign bit) the ripple may go up to and fill the zero, but
@@ -978,6 +954,48 @@ bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) {
     return true;
 
   return false;
+ }
+
+// Checks if any operand is negative and we can convert add to sub.
+// This function checks for following negative patterns
+//   ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
+//   TODO: ADD(XOR(AND(Z, ~C), ~C), 1) == NEG(OR(Z, C)) if C is even
+//   TODO: XOR(AND(Z, ~C), (~C + 1)) == NEG(OR(Z, C)) if C is odd
+Value *checkForNegativeOperand(BinaryOperator &I,
+                               InstCombiner::BuilderTy *Builder) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  // This function creates 2 instructions to replace ADD, we need at least one 
+  // of LHS or RHS to have one use to ensure benefit in transform.
+  if (!LHS->hasOneUse() && !RHS->hasOneUse())
+    return nullptr;
+
+  bool IHasNSW = I.hasNoSignedWrap();
+  bool IHasNUW = I.hasNoUnsignedWrap();
+
+  Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+  const APInt *C1 = nullptr, *C2 = nullptr;
+
+  // if ONE is on other side, swap
+  if (match(RHS, m_Add(m_Value(X), m_One())))
+    std::swap(LHS, RHS);
+
+  if (match(LHS, m_Add(m_Value(X), m_One()))) {
+    // if XOR on other side, swap
+    if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+      std::swap(X, RHS);
+
+    // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
+    // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
+    if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {
+      if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
+        Value *NewAnd = Builder->CreateAnd(Z, *C1);
+        return Builder->CreateSub(RHS, NewAnd, "", IHasNUW, IHasNSW);
+      }
+    }
+  }
+
+  return nullptr;
 }
 
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
@@ -1089,23 +1107,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (Value *V = dyn_castNegVal(RHS))
       return BinaryOperator::CreateSub(LHS, V);
 
-
-  {
-    Constant *C2;
-    if (Value *X = dyn_castFoldableMul(LHS, C2)) {
-      if (X == RHS) // X*C + X --> X * (C+1)
-        return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
-      // X*C1 + X*C2 --> X * (C1+C2)
-      Constant *C1;
-      if (X == dyn_castFoldableMul(RHS, C1))
-        return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
-    }
-
-    // X + X*C --> X * (C+1)
-    if (dyn_castFoldableMul(RHS, C2) == LHS)
-      return BinaryOperator::CreateMul(LHS, AddOne(C2));
-  }
+  if (Value *V = checkForNegativeOperand(I, Builder))
+    return ReplaceInstUsesWith(I, V);
 
   // A+B --> A|B iff A and B have no bits set in common.
   if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
@@ -1593,16 +1596,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     }
   }
 
-  Constant *C1;
-  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
-    if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
-    Constant *C2;   // X*C1 - X*C2 -> X * (C1-C2)
-    if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
-  }
-
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   if (DL) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4f5d65ab785f..b23a606e0889 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     C1 = dyn_cast<ConstantInt>(C);
     C2 = dyn_cast<ConstantInt>(D);
     if (C1 && C2) {  // (A & C1)|(B & C2)
-      // If we have: ((V + N) & C1) | (V & C2)
-      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-      // replace with V+N.
-      if (C1->getValue() == ~C2->getValue()) {
-        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
-            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
-            return ReplaceInstUsesWith(I, A);
-        }
-        // Or commutes, try both ways.
-        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
-            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
-          // Add commutes, try both ways.
-          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
-            return ReplaceInstUsesWith(I, B);
-        }
-      }
-
       if ((C1->getValue() & C2->getValue()) == 0) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d4bdd75fa82a..ff7456415814 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -922,6 +922,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
+  case Intrinsic::AMDGPU_rcp: {
+    if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+      const APFloat &ArgVal = C->getValueAPF();
+      APFloat Val(ArgVal.getSemantics(), 1.0);
+      APFloat::opStatus Status = Val.divide(ArgVal,
+                                            APFloat::rmNearestTiesToEven);
+      // Only do this if it was exact and therefore not dependent on the
+      // rounding mode.
+      if (Status == APFloat::opOK)
+        return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+    }
+
+    break;
+  }
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9996ebc2e744..497c0b49ab3f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       Value *X;
       Constant *C1;
       if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
-        Value *Add = Builder->CreateMul(X, Op1);
-        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1));
+        Value *Mul = Builder->CreateMul(C1, Op1);
+        // Only go forward with the transform if C1*CI simplifies to a tidier
+        // constant.
+        if (!match(Mul, m_Mul(m_Value(), m_Value())))
+          return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
       }
     }
   }
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index cc6665c947d7..2495747da5fe 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     // have a sign-extend idiom.
     Value *X;
     if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
-      // If the left shift is just shifting out partial signbits, delete the
-      // extension.
-      if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
-        return ReplaceInstUsesWith(I, X);
-
       // If the input is an extension from the shifted amount value, e.g.
       //   %x = zext i8 %A to i32
       //   %y = shl i32 %x, 24
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 991ad796a7e3..88d7a0d59eac 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -395,6 +395,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
   return false;
 }
 
+/// This function returns identity value for given opcode, which can be used to
+/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
+static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+  if (isa<Constant>(V))
+    return nullptr;
+
+  if (OpCode == Instruction::Mul)
+    return ConstantInt::get(V->getType(), 1);
+
+  // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
+
+  return nullptr;
+}
+
+/// This function factors binary ops which can be combined using distributive
+/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4).
+Instruction::BinaryOps getBinOpsForFactorization(BinaryOperator *Op,
+                                                 Value *&LHS, Value *&RHS) {
+  if (!Op)
+    return Instruction::BinaryOpsEnd;
+
+  if (Op->getOpcode() == Instruction::Shl) {
+    if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+      // The multiplier is really 1 << CST.
+      RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+      LHS = Op->getOperand(0);
+      return Instruction::Mul;
+    }
+  }
+
+  // TODO: We can add other conversions e.g. shr => div etc.
+
+  LHS = Op->getOperand(0);
+  RHS = Op->getOperand(1);
+  return Op->getOpcode();
+}
+
+/// This tries to simplify binary operations by factorizing out common terms
+/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
+                               const DataLayout *DL, BinaryOperator &I,
+                               Instruction::BinaryOps InnerOpcode, Value *A,
+                               Value *B, Value *C, Value *D) {
+
+  // If any of A, B, C, D are null, we can not factor I, return early.
+  // Checking A and C should be enough.
+  if (!A || !C || !B || !D)
+    return nullptr;
+
+  Value *SimplifiedInst = nullptr;
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
+
+  // Does "X op' Y" always equal "Y op' X"?
+  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+  if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+    // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+    // commutative case, "(A op' B) op (C op' A)"?
+    if (A == C || (InnerCommutative && A == D)) {
+      if (A != C)
+        std::swap(C, D);
+      // Consider forming "A op' (B op D)".
+      // If "B op D" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      // If "B op D" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+      }
+    }
+
+  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+  if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+    // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+    // commutative case, "(A op' B) op (B op' D)"?
+    if (B == D || (InnerCommutative && B == C)) {
+      if (B != D)
+        std::swap(C, D);
+      // Consider forming "(A op C) op' B".
+      // If "A op C" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+
+      // If "A op C" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+      }
+    }
+
+  if (SimplifiedInst) {
+    ++NumFactor;
+    SimplifiedInst->takeName(&I);
+
+    // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
+    // TODO: Check for NUW.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
+      if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
+        bool HasNSW = false;
+        if (isa<OverflowingBinaryOperator>(&I))
+          HasNSW = I.hasNoSignedWrap();
+
+        if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+          if (isa<OverflowingBinaryOperator>(Op0))
+            HasNSW &= Op0->hasNoSignedWrap();
+
+        if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+          if (isa<OverflowingBinaryOperator>(Op1))
+            HasNSW &= Op1->hasNoSignedWrap();
+        BO->setHasNoSignedWrap(HasNSW);
+      }
+    }
+  }
+  return SimplifiedInst;
+}
+
 /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
 /// which some other binary operation distributes over either by factorizing
 /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
@@ -404,65 +525,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
-  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
 
   // Factorization.
-  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
-    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
-    // a common term.
-    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
-    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
-    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+  Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B);
+  Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D);
+
+  // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+  // a common term.
+  if (LHSOpcode == RHSOpcode) {
+    if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+      return V;
+  }
 
-    // Does "X op' Y" always equal "Y op' X"?
-    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
-
-    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
-    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
-      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
-      // commutative case, "(A op' B) op (C op' A)"?
-      if (A == C || (InnerCommutative && A == D)) {
-        if (A != C)
-          std::swap(C, D);
-        // Consider forming "A op' (B op D)".
-        // If "B op D" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
-        // If "B op D" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, A, V);
-          V->takeName(&I);
-          return V;
-        }
-      }
+  // The instruction has the form "(A op' B) op (C)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+                                  getIdentityValue(LHSOpcode, RHS)))
+    return V;
 
-    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
-    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
-      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
-      // commutative case, "(A op' B) op (B op' D)"?
-      if (B == D || (InnerCommutative && B == C)) {
-        if (B != D)
-          std::swap(C, D);
-        // Consider forming "(A op C) op' B".
-        // If "A op C" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
-        // If "A op C" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, V, B);
-          V->takeName(&I);
-          return V;
-        }
-      }
-  }
+  // The instruction has the form "(B) op (C op' D)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
+                                  getIdentityValue(RHSOpcode, LHS), C, D))
+    return V;
 
   // Expansion.
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
   if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
     // The instruction has the form "(A op' B) op C".  See if expanding it out
     // to "(A op C) op' (B op C)" results in simplifications.
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e890943c6a84..75c56c2d4300 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1397,13 +1397,61 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     SC.Done(&I);
   }
 
+  // \brief Handle multiplication by constant.
+  //
+  // Handle a special case of multiplication by constant that may have one or
+  // more zeros in the lower bits. This makes corresponding number of lower bits
+  // of the result zero as well. We model it by shifting the other operand
+  // shadow left by the required number of bits. Effectively, we transform
+  // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
+  // We use multiplication by 2**N instead of shift to cover the case of
+  // multiplication by 0, which may occur in some elements of a vector operand.
+  void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
+                           Value *OtherArg) {
+    Constant *ShadowMul;
+    Type *Ty = ConstArg->getType();
+    if (Ty->isVectorTy()) {
+      unsigned NumElements = Ty->getVectorNumElements();
+      Type *EltTy = Ty->getSequentialElementType();
+      SmallVector<Constant *, 16> Elements;
+      for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+        ConstantInt *Elt =
+            dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
+        APInt V = Elt->getValue();
+        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+        Elements.push_back(ConstantInt::get(EltTy, V2));
+      }
+      ShadowMul = ConstantVector::get(Elements);
+    } else {
+      ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
+      APInt V = Elt->getValue();
+      APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+      ShadowMul = ConstantInt::get(Elt->getType(), V2);
+    }
+
+    IRBuilder<> IRB(&I);
+    setShadow(&I,
+              IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
+    setOrigin(&I, getOrigin(OtherArg));
+  }
+
+  void visitMul(BinaryOperator &I) {
+    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+    if (constOp0 && !constOp1)
+      handleMulByConstant(I, constOp0, I.getOperand(1));
+    else if (constOp1 && !constOp0)
+      handleMulByConstant(I, constOp1, I.getOperand(0));
+    else
+      handleShadowOr(I);
+  }
+
   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
-  void visitMul(BinaryOperator &I) { handleShadowOr(I); }
 
   void handleDiv(Instruction &I) {
     IRBuilder<> IRB(&I);
@@ -1970,10 +2018,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  // \brief Instrument vector shift instrinsic.
+  // \brief Instrument vector pack instrinsic.
   //
   // This function instruments intrinsics like x86_mmx_packsswb, that
-  // packs elements of 2 input vectors into half as much bits with saturation.
+  // packs elements of 2 input vectors into half as many bits with saturation.
   // Shadow is propagated with the signed variant of the same intrinsic applied
   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
   // EltSizeInBits is used only for x86mmx arguments.
@@ -2012,6 +2060,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // \brief Instrument sum-of-absolute-differencies intrinsic.
+  void handleVectorSadIntrinsic(IntrinsicInst &I) {
+    const unsigned SignificantBitsPerResultElement = 16;
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
+    unsigned ZeroBitsPerResultElement =
+        ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
+
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // \brief Instrument multiply-add intrinsic.
+  void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+                                  unsigned EltSizeInBits = 0) {
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case llvm::Intrinsic::bswap:
@@ -2148,6 +2230,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorPackIntrinsic(I, 32);
       break;
 
+    case llvm::Intrinsic::x86_mmx_psad_bw:
+    case llvm::Intrinsic::x86_sse2_psad_bw:
+    case llvm::Intrinsic::x86_avx2_psad_bw:
+      handleVectorSadIntrinsic(I);
+      break;
+
+    case llvm::Intrinsic::x86_sse2_pmadd_wd:
+    case llvm::Intrinsic::x86_avx2_pmadd_wd:
+    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128:
+    case llvm::Intrinsic::x86_avx2_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I);
+      break;
+
+    case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I, 8);
+      break;
+
+    case llvm::Intrinsic::x86_mmx_pmadd_wd:
+      handleVectorPmaddIntrinsic(I, 16);
+      break;
+
     default:
       if (!handleUnknownIntrinsic(I))
         visitInstruction(I);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 230a381593e0..6e50d3331df6 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -158,6 +158,15 @@ bool JumpThreading::runOnFunction(Function &F) {
   TLI = &getAnalysis<TargetLibraryInfo>();
   LVI = &getAnalysis<LazyValueInfo>();
 
+  // Remove unreachable blocks from function as they may result in infinite
+  // loop. We do threading if we found something profitable. Jump threading a
+  // branch can create other opportunities. If these opportunities form a cycle
+  // i.e. if any jump treading is undoing previous threading in the path, then
+  // we will loop forever. We take care of this issue by not jump threading for
+  // back edges. This works for normal cases but not for unreachable blocks as
+  // they may have cycle with no back edge.
+  removeUnreachableBlocks(F);
+
   FindLoopHeaders(F);
 
   bool Changed, EverChanged = false;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e155daf6fcce..ff2f2a036225 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3313,6 +3313,10 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
 static bool ValidLookupTableConstant(Constant *C) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     return CE->isGEPWithNoNotionalOverIndexing();
+  if (C->isThreadDependent())
+    return false;
+  if (C->isDLLImportDependent())
+    return false;
 
   return isa<ConstantFP>(C) ||
       isa<ConstantInt>(C) ||
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f18202be167e..53a43d9851e9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) {
   return true;
 }
 
+///\returns Opcode that can be clubbed with \p Op to create an alternate
+/// sequence which can later be merged as a ShuffleVector instruction.
+static unsigned getAltOpcode(unsigned Op) {
+  switch (Op) {
+  case Instruction::FAdd:
+    return Instruction::FSub;
+  case Instruction::FSub:
+    return Instruction::FAdd;
+  case Instruction::Add:
+    return Instruction::Sub;
+  case Instruction::Sub:
+    return Instruction::Add;
+  default:
+    return 0;
+  }
+}
+
+///\returns bool representing if Opcode \p Op can be part
+/// of an alternate sequence which can later be merged as
+/// a ShuffleVector instruction.
+static bool canCombineAsAltInst(unsigned Op) {
+  if (Op == Instruction::FAdd || Op == Instruction::FSub ||
+      Op == Instruction::Sub || Op == Instruction::Add)
+    return true;
+  return false;
+}
+
+/// \returns ShuffleVector instruction if intructions in \p VL have
+///  alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
+/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
+static unsigned isAltInst(ArrayRef<Value *> VL) {
+  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+  unsigned Opcode = I0->getOpcode();
+  unsigned AltOpcode = getAltOpcode(Opcode);
+  for (int i = 1, e = VL.size(); i < e; i++) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+      return 0;
+  }
+  return Instruction::ShuffleVector;
+}
+
 /// \returns The opcode if all of the Instructions in \p VL have the same
 /// opcode, or zero.
 static unsigned getSameOpcode(ArrayRef<Value *> VL) {
@@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
   unsigned Opcode = I0->getOpcode();
   for (int i = 1, e = VL.size(); i < e; i++) {
     Instruction *I = dyn_cast<Instruction>(VL[i]);
-    if (!I || Opcode != I->getOpcode())
+    if (!I || Opcode != I->getOpcode()) {
+      if (canCombineAsAltInst(Opcode) && i == 1)
+        return isAltInst(VL);
       return 0;
+    }
   }
   return Opcode;
 }
@@ -377,6 +422,7 @@ class BoUpSLP {
 
   /// \brief Perform LICM and CSE on the newly generated gather sequences.
   void optimizeGatherSequence();
+
 private:
   struct TreeEntry;
 
@@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
 
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
   bool SameTy = getSameType(VL); (void)SameTy;
+  bool isAltShuffle = false;
   assert(SameTy && "Invalid types!");
 
   if (Depth == RecursionMaxDepth) {
@@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       newTreeEntry(VL, false);
       return;
     }
+  unsigned Opcode = getSameOpcode(VL);
+
+  // Check that this shuffle vector refers to the alternate
+  // sequence of opcodes.
+  if (Opcode == Instruction::ShuffleVector) {
+    Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+    unsigned Op = I0->getOpcode();
+    if (Op != Instruction::ShuffleVector)
+      isAltShuffle = true;
+  }
 
   // If all of the operands are identical or constant we have a simple solution.
-  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
-      !getSameOpcode(VL)) {
+  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
     DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
     newTreeEntry(VL, false);
     return;
@@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
 
   DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
 
-  unsigned Opcode = getSameOpcode(VL);
-
   // Check if it is safe to sink the loads or the stores.
   if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
     Instruction *Last = getLastInstruction(VL);
@@ -1057,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       }
       return;
     }
+    case Instruction::ShuffleVector: {
+      // If this is not an alternate sequence of opcode like add-sub
+      // then do not vectorize this instruction.
+      if (!isAltShuffle) {
+        newTreeEntry(VL, false);
+        DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
+        return;
+      }
+      newTreeEntry(VL, true);
+      DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        buildTree_rec(Operands, Depth + 1);
+      }
+      return;
+    }
     default:
       newTreeEntry(VL, false);
       DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
@@ -1080,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
     }
     return getGatherCost(E->Scalars);
   }
-
-  assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
-         "Invalid VL");
+  unsigned Opcode = getSameOpcode(VL);
+  assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
   Instruction *VL0 = cast<Instruction>(VL[0]);
-  unsigned Opcode = VL0->getOpcode();
   switch (Opcode) {
     case Instruction::PHI: {
       return 0;
@@ -1242,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 
       return VecCallCost - ScalarCallCost;
     }
+    case Instruction::ShuffleVector: {
+      TargetTransformInfo::OperandValueKind Op1VK =
+          TargetTransformInfo::OK_AnyValue;
+      TargetTransformInfo::OperandValueKind Op2VK =
+          TargetTransformInfo::OK_AnyValue;
+      int ScalarCost = 0;
+      int VecCost = 0;
+      for (unsigned i = 0; i < VL.size(); ++i) {
+        Instruction *I = cast<Instruction>(VL[i]);
+        if (!I)
+          break;
+        ScalarCost +=
+            TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
+      }
+      // VecCost is equal to sum of the cost of creating 2 vectors
+      // and the cost of creating shuffle.
+      Instruction *I0 = cast<Instruction>(VL[0]);
+      VecCost =
+          TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
+      Instruction *I1 = cast<Instruction>(VL[1]);
+      VecCost +=
+          TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
+      VecCost +=
+          TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
+      return VecCost - ScalarCost;
+    }
     default:
       llvm_unreachable("Unknown instruction");
   }
@@ -1522,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     setInsertPointAfterBundle(E->Scalars);
     return Gather(E->Scalars, VecTy);
   }
-
-  unsigned Opcode = VL0->getOpcode();
-  assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
+  unsigned Opcode = getSameOpcode(E->Scalars);
 
   switch (Opcode) {
     case Instruction::PHI: {
@@ -1797,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       E->VectorizedValue = V;
       return V;
     }
+    case Instruction::ShuffleVector: {
+      ValueList LHSVL, RHSVL;
+      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+        LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+        RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+      }
+      setInsertPointAfterBundle(E->Scalars);
+
+      Value *LHS = vectorizeTree(LHSVL);
+      Value *RHS = vectorizeTree(RHSVL);
+
+      if (Value *V = alreadyVectorized(E->Scalars))
+        return V;
+
+      // Create a vector of LHS op1 RHS
+      BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
+      Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
+
+      // Create a vector of LHS op2 RHS
+      Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
+      BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
+      Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
+
+      // Create appropriate shuffle to take alternative operations from
+      // the vector.
+      std::vector<Constant *> Mask(E->Scalars.size());
+      unsigned e = E->Scalars.size();
+      for (unsigned i = 0; i < e; ++i) {
+        if (i & 1)
+          Mask[i] = Builder.getInt32(e + i);
+        else
+          Mask[i] = Builder.getInt32(i);
+      }
+
+      Value *ShuffleMask = ConstantVector::get(Mask);
+
+      Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+      E->VectorizedValue = V;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        return propagateMetadata(I, E->Scalars);
+
+      return V;
+    }
     default:
     llvm_unreachable("unknown inst");
   }
@@ -1865,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() {
     // For each lane:
     for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
       Value *Scalar = Entry->Scalars[Lane];
-
       // No need to handle users of gathered values.
       if (Entry->NeedToGather)
         continue;
@@ -2049,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass {
     for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
          e = po_end(&F.getEntryBlock()); it != e; ++it) {
       BasicBlock *BB = *it;
-
       // Vectorize trees that end at stores.
       if (unsigned count = collectStores(BB, R)) {
         (void)count;
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
new file mode 100644
index 000000000000..7123e5e0b235
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -march=arm64 | FileCheck %s
+
+
+define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i16
+; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
+; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+  %tmp1 = load <4 x double>* %ptr
+  %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
+; CHECK: fptosi_v4f64_to_v4i8
+; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
+; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG:  fcvtzs  v[[CONV0:[0-9]+]].2d, v0.2d
+; CHECK-DAG:  xtn  v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG:  xtn  v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
+; CHECK-DAG:  xtn  v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG:  xtn  v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
+; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
+; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+  %tmp1 = load <8 x double>* %ptr
+  %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
+  ret <8 x i8> %tmp2
+}
+
diff --git a/test/CodeGen/AArch64/branch-relax-asm.ll b/test/CodeGen/AArch64/branch-relax-asm.ll
new file mode 100644
index 000000000000..7409c84e6180
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-relax-asm.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s
+define i32 @test_asm_length(i32 %in) {
+; CHECK-LABEL: test_asm_length:
+
+  ; It would be more natural to use just one "tbnz %false" here, but if the
+  ; number of instructions in the asm is counted reasonably, that block is out
+  ; of the limited range we gave tbz. So branch relaxation has to invert the
+  ; condition.
+; CHECK:     tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]]
+; CHECK:     b [[FALSE:LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[TRUE]]:
+; CHECK:     orr w0, wzr, #0x4
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     nop
+; CHECK:     ret
+
+; CHECK: [[FALSE]]:
+; CHECK:     ret
+
+  %val = and i32 %in, 1
+  %tst = icmp eq i32 %val, 0
+  br i1 %tst, label %true, label %false
+
+true:
+  call void asm sideeffect "nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop\0A\09nop", ""()
+  ret i32 4
+
+false:
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/metadata-default.ll b/test/CodeGen/ARM/metadata-default.ll
new file mode 100644
index 000000000000..f6a3fe289cc1
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-default.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-enums.ll b/test/CodeGen/ARM/metadata-short-enums.ll
new file mode 100644
index 000000000000..bccd3327e5b5
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-enums.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 1}
+
+; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 1   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-wchar.ll b/test/CodeGen/ARM/metadata-short-wchar.ll
new file mode 100644
index 000000000000..6de9bf174317
--- /dev/null
+++ b/test/CodeGen/ARM/metadata-short-wchar.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7--none-eabi"
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = metadata !{i32 1, metadata !"wchar_size", i32 2}
+!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+
+; CHECK: .eabi_attribute 18, 2   @ Tag_ABI_PCS_wchar_t
+; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 20d46708c550..066d42cc302d 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -344,11 +344,11 @@ entry:
 
 ; ALL-LABEL: CheckSync:
 
-; ALL:           sync 0
+; ALL:           sync
 ; ALL:           ll
 ; ALL:           sc
 ; ALL:           beq
-; ALL:           sync 0
+; ALL:           sync
 }
 
 ; make sure that this assertion in
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
index f65a14f7bb1f..b9badf5dc582 100644
--- a/test/CodeGen/Mips/msa/special.ll
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -4,6 +4,10 @@
 ; RUN:   FileCheck %s --check-prefix=MIPS32
 ; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
 ; RUN:   FileCheck %s --check-prefix=MIPS64
+; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa < %s | \
+; RUN:   FileCheck %s --check-prefix=MIPS64
 
 define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 1f3bb7111efd..31794be25beb 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -42,12 +42,18 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
   ret void
 }
 
-; Absolute vales should be have the TOC restore 'nop'
+; Absolute values must use the regular indirect call sequence
+; The main purpose of this test is to ensure that BLA is not
+; used on 64-bit SVR4 (as e.g. on Darwin).
 define void @test_abs() nounwind {
 ; CHECK-LABEL: test_abs:
   tail call void inttoptr (i64 1024 to void ()*)() nounwind
-; CHECK: bla 1024
-; CHECK-NEXT: nop
+; CHECK: ld [[FP:[0-9]+]], 1024(0)
+; CHECK: ld 11, 1040(0)
+; CHECK: ld 2, 1032(0)
+; CHECK-NEXT: mtctr [[FP]]
+; CHECK-NEXT: bctrl
+; CHECK-NEXT: ld 2, 40(1)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 4bce8c80fc6a..2733089fcb10 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -36,7 +36,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v16i8 since it is a altivec native type
+; Additional tests for v16i8 since it is a altivec native type
 
 define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %cmp = icmp eq <16 x i8> %x, %y
@@ -165,7 +165,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v8i16 since it is an altivec native type
+; Additional tests for v8i16 since it is an altivec native type
 
 define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -298,7 +298,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4si32 since it is an altivec native type
+; Additional tests for v4si32 since it is an altivec native type
 
 define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -449,7 +449,7 @@ entry:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4f32 since it is a altivec native type
+; Additional tests for v4f32 since it is a altivec native type
 
 define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index c2362da15cde..3230353c36c7 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -10,7 +10,12 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 
 ; SI-LABEL: @test_private_array_ptr_calc:
 ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-; SI: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
+;
+; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
+; alloca to a vector.  It currently fails because it does not know how
+; to interpret:
+; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %alloca = alloca [4 x i32], i32 4, align 16
   %tid = call i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 6b683769fe06..511e8ef62951 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -101,7 +101,7 @@ IF137:                                            ; preds = %main_body
   %88 = insertelement <4 x float> %87, float %32, i32 2
   %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
   %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
-  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
   %92 = fmul float %30, %91
   %93 = fmul float %31, %91
   %94 = fmul float %32, %91
@@ -344,7 +344,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %325 = insertelement <4 x float> %324, float %318, i32 2
   %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
   %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
-  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
   %329 = fmul float %314, %328
   %330 = fmul float %316, %328
   %331 = fmul float %318, %328
@@ -377,7 +377,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %358 = insertelement <4 x float> %357, float %45, i32 2
   %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
   %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
-  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
   %362 = fmul float %45, %361
   %363 = call float @fabs(float %362)
   %364 = fmul float %176, 0x3FECCCCCC0000000
@@ -403,7 +403,7 @@ ENDIF136:                                         ; preds = %main_body, %ENDIF15
   %384 = insertelement <4 x float> %383, float %45, i32 2
   %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
   %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
-  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
   %388 = fmul float %45, %387
   %389 = call float @fabs(float %388)
   %390 = fmul float %176, 0x3FF51EB860000000
@@ -1041,7 +1041,7 @@ IF179:                                            ; preds = %ENDIF175
   %896 = insertelement <4 x float> %895, float %45, i32 2
   %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
   %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
-  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
   %900 = fmul float %45, %899
   %901 = call float @fabs(float %900)
   %902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1150,7 +1150,7 @@ ENDIF178:                                         ; preds = %ENDIF175, %IF179
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
new file mode 100644
index 000000000000..15b5188efd60
--- /dev/null
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_ctlz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: V_FFBH_U32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
new file mode 100644
index 000000000000..cf44f8e60d01
--- /dev/null
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
+
+; FUNC-LABEL: @s_cttz_zero_undef_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
+  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
+; SI: BUFFER_LOAD_DWORDX2
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
+  store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
+; SI: BUFFER_LOAD_DWORDX4
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: V_FFBL_B32_e32
+; SI: BUFFER_STORE_DWORDX4
+; SI: S_ENDPGM
+define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
+  store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index b8b945f46ffe..458363adc1e3 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,84 +1,131 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare double @llvm.ceil.f64(double) nounwind readnone
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
+declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
 
-; CI-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
-define void @fceil_f64(double addrspace(1)* %out, double %x) {
-  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
-  store double %y, double addrspace(1)* %out
+; FUNC-LABEL: @fceil_f32:
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_f32(float addrspace(1)* %out, float %x) {
+  %y = call float @llvm.ceil.f32(float %x) nounwind readnone
+  store float %y, float addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
-  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
-  store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v2f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
+  %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
+  store <2 x float> %y, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FIXME-CI-LABEL: @fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
-;   store <3 x double> %y, <3 x double> addrspace(1)* %out
-;   ret void
-; }
+; FUNC-LABEL: @fceil_v3f32:
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-SI: V_CEIL_F32_e32
+; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
+  %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
+  store <3 x float> %y, <3 x float> addrspace(1)* %out
+  ret void
+}
 
-; CI-LABEL: @fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
-  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
-  store <4 x double> %y, <4 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v4f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+; EG: CEIL {{\*? *}}[[RESULT]]
+define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
+  %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+  store <4 x float> %y, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
-  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
-  store <8 x double> %y, <8 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v8f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
+  %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
+  store <8 x float> %y, <8 x float> addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
-  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
-  store <16 x double> %y, <16 x double> addrspace(1)* %out
+; FUNC-LABEL: @fceil_v16f32:
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; SI: V_CEIL_F32_e32
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
+; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
+define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
+  %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
+  store <16 x float> %y, <16 x float> addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
new file mode 100644
index 000000000000..b42aefa17328
--- /dev/null
+++ b/test/CodeGen/R600/fceil64.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: @fceil_f64:
+; CI: V_CEIL_F64_e32
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
+define void @fceil_f64(double addrspace(1)* %out, double %x) {
+  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v2f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
+  store <2 x double> %y, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FIXME-FUNC-LABEL: @fceil_v3f64:
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; FIXME-CI: V_CEIL_F64_e32
+; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
+;   store <3 x double> %y, <3 x double> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: @fceil_v4f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+  store <4 x double> %y, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v8f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
+  store <8 x double> %y, <8 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fceil_v16f64:
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+; CI: V_CEIL_F64_e32
+define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
+  store <16 x double> %y, <16 x double> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 51d2b8961504..31c6116988e6 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.floor.f64(double) nounwind readnone
 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,34 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @ffloor_f64:
+; FUNC-LABEL: @ffloor_f64:
 ; CI: V_FLOOR_F64_e32
+
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_LT_F64
+; SI: CNDMASK_B32
+; SI: CMP_NE_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: V_ADD_F64
 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.floor.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ffloor_v2f64:
+; FUNC-LABEL: @ffloor_v2f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @ffloor_v3f64:
+; FIXME-FUNC-LABEL: @ffloor_v3f64:
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
 ; FIXME-CI: V_FLOOR_F64_e32
@@ -34,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @ffloor_v4f64:
+; FUNC-LABEL: @ffloor_v4f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -45,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v8f64:
+; FUNC-LABEL: @ffloor_v8f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
@@ -60,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ffloor_v16f64:
+; FUNC-LABEL: @ffloor_v16f64:
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
 ; CI: V_FLOOR_F64_e32
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
new file mode 100644
index 000000000000..1c1d7315189f
--- /dev/null
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s
+
+; This should have the exactly the same output as the test for rint,
+; so no need to check anything.
+
+declare float @llvm.nearbyint.f32(float) #0
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0
+declare double @llvm.nearbyint.f64(double) #0
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
+
+
+define void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 {
+entry:
+  %0 = call float @llvm.nearbyint.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+entry:
+  %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
+entry:
+  %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_f64(double addrspace(1)* %out, double %in) {
+entry:
+  %0 = call double @llvm.nearbyint.f64(double %in)
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+define void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+entry:
+  %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
+  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+define void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+entry:
+  %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in)
+  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/fp_to_sint_i64.ll b/test/CodeGen/R600/fp_to_sint_i64.ll
new file mode 100644
index 000000000000..ec3e19804c57
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint_i64.ll
@@ -0,0 +1,12 @@
+; FIXME: Merge into fp_to_sint.ll when EG/NI supports 64-bit types
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI-LABEL: @fp_to_sint_i64
+; Check that the compiler doesn't crash with a "cannot select" error
+; SI: S_ENDPGM
+define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fptosi float %in to i64
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index 1445a20839ad..f5e5708f1b41 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: @fsub_f64
-; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2
+; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
+; SI-LABEL: @fsub_f64:
+; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 6b235ffbd980..3cd1deb921fc 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.trunc.f64(double) nounwind readnone
 declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
@@ -7,15 +8,40 @@ declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
 
-; CI-LABEL: @ftrunc_f64:
+; FUNC-LABEL: @v_ftrunc_f64:
 ; CI: V_TRUNC_F64_e32
+; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
+; SI: S_ENDPGM
+define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+  %x = load double addrspace(1)* %in, align 8
+  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @ftrunc_f64:
+; CI: V_TRUNC_F64_e32
+
+; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: S_LSHR_B64
+; SI: S_NOT_B64
+; SI: S_AND_B64
+; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: CMP_LT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: CMP_GT_I32
+; SI: CNDMASK_B32
+; SI: CNDMASK_B32
+; SI: S_ENDPGM
 define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.trunc.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; CI-LABEL: @ftrunc_v2f64:
+; FUNC-LABEL: @ftrunc_v2f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
@@ -24,7 +50,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   ret void
 }
 
-; FIXME-CI-LABEL: @ftrunc_v3f64:
+; FIXME-FUNC-LABEL: @ftrunc_v3f64:
 ; FIXME-CI: V_TRUNC_F64_e32
 ; FIXME-CI: V_TRUNC_F64_e32
 ; FIXME-CI: V_TRUNC_F64_e32
@@ -34,7 +60,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
 ;   ret void
 ; }
 
-; CI-LABEL: @ftrunc_v4f64:
+; FUNC-LABEL: @ftrunc_v4f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
@@ -45,7 +71,7 @@ define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ftrunc_v8f64:
+; FUNC-LABEL: @ftrunc_v8f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
@@ -60,7 +86,7 @@ define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   ret void
 }
 
-; CI-LABEL: @ftrunc_v16f64:
+; FUNC-LABEL: @ftrunc_v16f64:
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
 ; CI: V_TRUNC_F64_e32
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 4d1f7347ecc9..b127b7ede2e8 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -3,10 +3,8 @@
 declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 
 ; SI-LABEL: @private_access_f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
@@ -19,14 +17,10 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 }
 
 ; SI-LABEL: @private_access_v2f64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
@@ -39,10 +33,8 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
 }
 
 ; SI-LABEL: @private_access_i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
@@ -55,14 +47,10 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 }
 
 ; SI-LABEL: @private_access_v2i64_alloca:
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELD_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
-; SI: V_MOVRELS_B32_e32
+; SI: DS_WRITE_B64
+; SI: DS_WRITE_B64
+; SI: DS_READ_B64
+; SI: DS_READ_B64
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index dd9b6775c082..d8be6d40f310 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -2,10 +2,13 @@
 ; REQUIRES: asserts
 ; RUN: llc -march=r600 -mcpu=SI < %s
 
-define void @large_alloca(i32 addrspace(1)* %out, i32 %x) nounwind {
-  %large = alloca [256 x i32], align 4
-  %gep = getelementptr [256 x i32]* %large, i32 0, i32 255
+define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191
   store i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y
+  %0 = load i32* %gep1
+  store i32 %0, i32 addrspace(1)* %out
   ret void
 }
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
new file mode 100644
index 000000000000..a0a47b7c4701
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
+
+; Legacy name
+declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
+
+; FUNC-LABEL: @s_abs_i32
+; SI: S_SUB_I32
+; SI: S_MAX_I32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %src) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_abs_i32
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @abs_i32_legacy_amdil
+; SI: V_SUB_I32_e32
+; SI: V_MAX_I32_e32
+; SI: S_ENDPGM
+
+; EG: SUB_INT
+; EG: MAX_INT
+define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32 addrspace(1)* %src, align 4
+  %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
+  store i32 %abs, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
new file mode 100644
index 000000000000..68a5ad0649c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
+
+; FUNC-LABEL: @s_brev_i32:
+; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
+; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI: S_ENDPGM
+define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_brev_i32:
+; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
+; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+  %val = load i32 addrspace(1)* %valptr, align 4
+  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
+  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
new file mode 100644
index 000000000000..d608953a0dd2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
+declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
+
+; FUNC-LABEL: @clamp_0_1_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: S_ENDPGM
+
+; EG: MOV_SAT
+define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
+; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
+; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
+  %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
new file mode 100644
index 000000000000..c8c73573e073
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fixup_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fixup_f64:
+; SI: V_DIV_FIXUP_F64
+define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
new file mode 100644
index 000000000000..4f1e827c2cbd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fmas_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @test_div_fmas_f64:
+; SI: V_DIV_FMAS_F64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
new file mode 100644
index 000000000000..1bcbe2f859ed
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
+
+; SI-LABEL @test_div_scale_f32:
+define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64:
+define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load double addrspace(1)* %bptr, align 8
+  %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
new file mode 100644
index 000000000000..72ec1c57571e
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
+
+; Legacy name
+declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
+
+; FUNC-LABEL: @fract_f32
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fract_f32_legacy_amdil
+; SI: V_FRACT_F32
+; EG: FRACT
+define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
+  %val = load float addrspace(1)* %src, align 4
+  %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
+  store float %fract, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
new file mode 100644
index 000000000000..ca5260dc5bc8
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rcp_f32
+; SI: V_RCP_F32_e32
+define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_f64
+; SI: V_RCP_F64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f32
+; SI: V_RCP_F32_e32
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %rcp = fdiv float 1.0, %src
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f64
+; SI: V_RCP_F64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %rcp = fdiv double 1.0, %src
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f32
+; SI: V_RSQ_F32_e32
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
+  %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+  store float %rcp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f64
+; SI: V_RSQ_F64_e32
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+  %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
+  %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
+  store double %rcp, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
new file mode 100644
index 000000000000..1c736d447ea9
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+
+; SI-LABEL: @test_trig_preop_f64:
+; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
+; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %b = load i32 addrspace(1)* %bptr, align 4
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @test_trig_preop_f64_imm_segment:
+; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+  %a = load double addrspace(1)* %aptr, align 8
+  %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll
new file mode 100644
index 000000000000..8402faaa4dca
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.gather4.ll
@@ -0,0 +1,508 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @gather4_v2
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4
+;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl
+;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l
+;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b
+;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_v8
+;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_v2
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz
+;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_o
+;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_cl_o_v8
+;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_cl_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_l_o_v8
+;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_l_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_o_v8
+;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_b_cl_o
+;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_lz_o
+;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c
+;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_v8
+;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_v8
+;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_v8
+;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl
+;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz
+;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+;CHECK-LABEL: @gather4_c_o
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_o_v8
+;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_cl_o
+;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_l_o
+;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_l_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_o
+;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_b_cl_o
+;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_b_cl_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+;CHECK-LABEL: @gather4_c_lz_o_v8
+;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @gather4_c_lz_o_v8() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  %r2 = extractelement <4 x float> %r, i32 2
+  %r3 = extractelement <4 x float> %r, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+  ret void
+}
+
+
+
+declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll
new file mode 100644
index 000000000000..a7a17ec3fffa
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.getlod.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @getlod
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v2
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v2() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+;CHECK-LABEL: @getlod_v4
+;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @getlod_v4() #0 {
+main_body:
+  %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %r0 = extractelement <4 x float> %r, i32 0
+  %r1 = extractelement <4 x float> %r, i32 1
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index a7a909addafb..3e2884b7ce02 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,30 +1,38 @@
 ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @f64
+; FUNC-LABEL: @rint_f64
 ; CI: V_RNDNE_F64_e32
-define void @f64(double addrspace(1)* %out, double %in) {
+
+; SI-DAG: V_ADD_F64
+; SI-DAG: V_ADD_F64
+; SI-DAG V_CMP_GT_F64_e64
+; SI: V_CNDMASK_B32
+; SI: V_CNDMASK_B32
+; SI: S_ENDPGM
+define void @rint_f64(double addrspace(1)* %out, double %in) {
 entry:
   %0 = call double @llvm.rint.f64(double %in)
   store double %0, double addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v2f64
+; FUNC-LABEL: @rint_v2f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
 entry:
   %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
   store <2 x double> %0, <2 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @v4f64
+; FUNC-LABEL: @rint_v4f64
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
 ; CI: V_RNDNE_F64_e32
-define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
 entry:
   %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
   store <4 x double> %0, <4 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 4afaf684bfce..8a269e0cb43a 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -7,6 +7,12 @@
 ; CHECK: AND_INT
 ; CHECK-NEXT: AND_INT
 ; CHECK-NEXT: OR_INT
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
+
 define void @_Z9chk1D_512v() #0 {
 entry:
   %a0 = alloca i32, align 4
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
index b0db7cdd0671..feca688c30aa 100644
--- a/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -3,6 +3,11 @@
 ;
 ; CFG flattening should use parallel-or to generate branch conditions and
 ; then merge if-regions with the same bodies.
+
+; FIXME: For some reason having the allocas here allowed the flatten cfg pass
+; to do its transfomation, however now that we are using local memory for
+; allocas, the transformation isn't happening.
+; XFAIL: *
 ;
 ; CHECK: OR_INT
 ; CHECK-NEXT: OR_INT
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index d3453f26aee6..c60c05975600 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,24 +1,17 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-; This test checks that uses and defs of the AR register happen in the same
-; instruction clause.
-
 ; FUNC-LABEL: @mova_same_clause
 
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-; R600-CHECK: MOVA_INT
-; R600-CHECK-NOT: ALU clause
-; R600-CHECK: 0 + AR.x
-
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
-; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
-; SI-CHECK: V_MOVRELD
-; SI-CHECK: S_CBRANCH
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_WRITE
+; R600-CHECK: LDS_READ
+; R600-CHECK: LDS_READ
+
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -114,12 +107,8 @@ for.end:
 
 ; FUNC-LABEL: @short_array
 
-; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal
-; R600-CHECK: 65536
-; R600-CHECK: *
 ; R600-CHECK: MOVA_INT
 
-; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000
 ; SI-CHECK: V_MOVRELS_B32_e32
 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
@@ -137,10 +126,7 @@ entry:
 
 ; FUNC-LABEL: @char_array
 
-; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal
-; R600-CHECK: 256
-; R600-CHECK: *
-; R600-CHECK-NEXT: MOVA_INT
+; R600-CHECK: MOVA_INT
 
 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
 ; SI-CHECK: V_MOVRELS_B32_e32
@@ -185,7 +171,9 @@ entry:
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
 ; FUNC-LABEL: @no_overlap
-; R600-CHECK: MOV {{\** *}}T3.X
+; R600_CHECK: MOV
+; R600_CHECK: [[CHAN:[XYZW]]]+
+; R600-CHECK-NOT: [[CHAN]]+
 ; SI-CHECK: V_MOV_B32_e32 v3
 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
 entry:
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index f322bc71c6bd..55eb56d3fb1d 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -103,7 +103,7 @@ main_body:
   %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
   %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
   %97 = call float @fabs(float %96)
-  %98 = call float @llvm.AMDGPU.rsq(float %97)
+  %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
   %99 = fmul float %4, %98
   %100 = fmul float %5, %98
   %101 = fmul float %6, %98
@@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 declare float @fabs(float) #2
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.clamp.(float, float, float) #1
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c581d86b99bd..c7d5bf90644e 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -70,7 +70,7 @@ main_body:
   %55 = fadd float %54, %53
   %56 = fmul float %45, %45
   %57 = fadd float %55, %56
-  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
   %59 = fmul float %43, %58
   %60 = fmul float %44, %58
   %61 = fmul float %45, %58
@@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #3
+declare float @llvm.AMDGPU.rsq.f32(float) #3
 
 ; Function Attrs: readnone
 declare float @llvm.AMDIL.exp.(float) #3
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 4a6aab4a104a..43fab2a39dcc 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -39,5 +39,118 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-; XXX: Add SI test for i64 shl once i64 stores and i64 function arguments are
-; supported.
+;EG-CHECK: @shl_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @shl_i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = shl i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v2i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = shl <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, 1
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: LSHL
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @shl_v4i64
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = shl <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index b34a757d9b65..53a096513bbc 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -203,7 +203,7 @@ main_body:
   %198 = fadd float %197, %196
   %199 = fmul float %97, %97
   %200 = fadd float %198, %199
-  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
   %202 = fmul float %95, %201
   %203 = fmul float %96, %201
   %204 = fmul float %202, %29
@@ -384,7 +384,7 @@ IF67:                                             ; preds = %LOOP65
   %355 = fadd float %354, %353
   %356 = fmul float %352, %352
   %357 = fadd float %355, %356
-  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
   %359 = fmul float %350, %358
   %360 = fmul float %351, %358
   %361 = fmul float %352, %358
@@ -512,7 +512,7 @@ IF67:                                             ; preds = %LOOP65
   %483 = fadd float %482, %481
   %484 = fmul float %109, %109
   %485 = fadd float %483, %484
-  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
   %487 = fmul float %107, %486
   %488 = fmul float %108, %486
   %489 = fmul float %109, %486
@@ -541,7 +541,7 @@ IF67:                                             ; preds = %LOOP65
   %512 = fadd float %511, %510
   %513 = fmul float %97, %97
   %514 = fadd float %512, %513
-  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
   %516 = fmul float %95, %515
   %517 = fmul float %96, %515
   %518 = fmul float %97, %515
@@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2
 declare float @ceil(float) #3
 
 ; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #2
+declare float @llvm.AMDGPU.rsq.f32(float) #2
 
 ; Function Attrs: nounwind readnone
 declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -887,7 +887,7 @@ main_body:
   %212 = fadd float %211, %210
   %213 = fmul float %209, %209
   %214 = fadd float %212, %213
-  %215 = call float @llvm.AMDGPU.rsq(float %214)
+  %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
   %216 = fmul float %205, %215
   %217 = fmul float %207, %215
   %218 = fmul float %209, %215
@@ -1123,7 +1123,7 @@ IF189:                                            ; preds = %LOOP
   %434 = fsub float -0.000000e+00, %433
   %435 = fadd float 0x3FF00068E0000000, %434
   %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
-  %437 = call float @llvm.AMDGPU.rsq(float %436)
+  %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
   %438 = fmul float %437, %436
   %439 = fsub float -0.000000e+00, %436
   %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1147,7 +1147,7 @@ IF189:                                            ; preds = %LOOP
   %458 = fadd float %457, %456
   %459 = fmul float %455, %455
   %460 = fadd float %458, %459
-  %461 = call float @llvm.AMDGPU.rsq(float %460)
+  %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
   %462 = fmul float %451, %461
   %463 = fmul float %453, %461
   %464 = fmul float %455, %461
@@ -1257,7 +1257,7 @@ ENDIF197:                                         ; preds = %IF189, %IF198
   %559 = fadd float %558, %557
   %560 = fmul float %556, %556
   %561 = fadd float %559, %560
-  %562 = call float @llvm.AMDGPU.rsq(float %561)
+  %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
   %563 = fmul float %562, %561
   %564 = fsub float -0.000000e+00, %561
   %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index d9f60ea1a4dc..dee432664e89 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
+; XFAIL: *
+
 ; 64-bit select was originally lowered with a build_pair, and this
 ; could be simplified to 1 cndmask instead of 2, but that broken when
 ; it started being implemented with a v2i32 build_vector and
@@ -12,9 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
   ret void
 }
 
+; FIXME: Fix truncating store for local memory
 ; SI-LABEL: @trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
+; SI: DS_READ_B32
+; SI-NOT: DS_READ_B64
 ; SI: S_ENDPGM
 define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
   %idx = add i32 %a, %b
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index fe9df104ae11..9eb3dc544074 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -52,3 +52,133 @@ entry:
   ret void
 }
 
+;EG-CHECK-LABEL: @ashr_i64_2
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @ashr_i64_2
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = ashr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v2i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = ashr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: ASHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: ASHR {{.*}}, literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK-LABEL: @ashr_v4i64
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = ashr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 76373552fb16..44ad73f073ed 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -39,3 +39,129 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @lshr_i64
+;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
+
+;SI-CHECK: @lshr_i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1) * %in
+  %b = load i64 addrspace(1) * %b_ptr
+  %result = lshr i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @lshr_v2i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v2i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %result = lshr <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+
+;EG-CHECK: @lshr_v4i64
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
+;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHL {{.*}}, 1
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
+;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: OR_INT
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: LSHR
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+;EG-CHECK-DAG: CNDE_INT
+
+;SI-CHECK: @lshr_v4i64
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %result = lshr <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
new file mode 100644
index 000000000000..6543f6d05933
--- /dev/null
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: @vector_read
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 1, i32* %y
+  store i32 2, i32* %z
+  store i32 3, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index
+  %2 = load i32* %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @vector_write
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOV
+; EG: MOVA_INT
+; EG: MOVA_INT
+define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index
+  store i32 1, i32* %1
+  %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index
+  %3 = load i32* %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; This test should be optimize to:
+; store i32 0, i32 addrspace(1)* %out
+; FUNC-LABEL: @bitcast_gep
+; CHECK: STORE_RAW
+define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
+entry:
+  %0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32]* %0, i32 0, i32 0
+  %y = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %z = getelementptr [4 x i32]* %0, i32 0, i32 2
+  %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  store i32 0, i32* %z
+  store i32 0, i32* %w
+  %1 = getelementptr [4 x i32]* %0, i32 0, i32 1
+  %2 = bitcast i32* %1 to [4 x i32]*
+  %3 = getelementptr [4 x i32]* %2, i32 0, i32 0
+  %4 = load i32* %3
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 2476ea1253e6..f5cda96b99fa 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -192,6 +192,14 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
   ret <16 x double> %b
 }
 
+; CHECK-LABEL: uitof64_256
+; CHECK: vcvtudq2pd
+; CHECK: ret
+define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
+  %b = uitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %b
+}
+
 ; CHECK-LABEL: uitof32
 ; CHECK: vcvtudq2ps
 ; CHECK: ret
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll
new file mode 100644
index 000000000000..ef50cdb82831
--- /dev/null
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
+
+define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
+; CHECK: vmovntps %z
+  %cast = bitcast i8* %B to <16 x float>*
+  %A2 = fadd <16 x float> %A, %AA
+  store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %z
+  %cast1 = bitcast i8* %B to <8 x i64>*
+  %E2 = add <8 x i64> %E, %EE
+  store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %z
+  %cast2 = bitcast i8* %B to <8 x double>*
+  %C2 = fadd <8 x double> %C, %CC
+  store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
new file mode 100644
index 000000000000..a3f6851ca240
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -0,0 +1,470 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define i32 @fcmp_oeq1(float %x) {
+; CHECK-LABEL: fcmp_oeq1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oeq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oeq2(float %x) {
+; CHECK-LABEL: fcmp_oeq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_2}}
+  %1 = fcmp oeq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt1(float %x) {
+; CHECK-LABEL: fcmp_ogt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp ogt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt2(float %x) {
+; CHECK-LABEL: fcmp_ogt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp ogt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge1(float %x) {
+; CHECK-LABEL: fcmp_oge1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp oge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge2(float %x) {
+; CHECK-LABEL: fcmp_oge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp oge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt1(float %x) {
+; CHECK-LABEL: fcmp_olt1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp olt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt2(float %x) {
+; CHECK-LABEL: fcmp_olt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jbe {{LBB.+_1}}
+  %1 = fcmp olt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole1(float %x) {
+; CHECK-LABEL: fcmp_ole1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ole float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole2(float %x) {
+; CHECK-LABEL: fcmp_ole2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jb {{LBB.+_1}}
+  %1 = fcmp ole float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one1(float %x) {
+; CHECK-LABEL: fcmp_one1
+; CHECK-NOT:   ucomiss
+; CHECK:       movl $1, %eax
+  %1 = fcmp one float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one2(float %x) {
+; CHECK-LABEL: fcmp_one2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_1}}
+  %1 = fcmp one float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord1(float %x) {
+; CHECK-LABEL: fcmp_ord1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord2(float %x) {
+; CHECK-LABEL: fcmp_ord2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_1}}
+  %1 = fcmp ord float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno1(float %x) {
+; CHECK-LABEL: fcmp_uno1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno2(float %x) {
+; CHECK-LABEL: fcmp_uno2
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jp {{LBB.+_2}}
+  %1 = fcmp uno float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq1(float %x) {
+; CHECK-LABEL: fcmp_ueq1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ueq float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq2(float %x) {
+; CHECK-LABEL: fcmp_ueq2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  je {{LBB.+_2}}
+  %1 = fcmp ueq float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt1(float %x) {
+; CHECK-LABEL: fcmp_ugt1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ugt float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt2(float %x) {
+; CHECK-LABEL: fcmp_ugt2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ugt float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge1(float %x) {
+; CHECK-LABEL: fcmp_uge1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp uge float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge2(float %x) {
+; CHECK-LABEL: fcmp_uge2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm0, %xmm1
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp uge float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult1(float %x) {
+; CHECK-LABEL: fcmp_ult1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp ult float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult2(float %x) {
+; CHECK-LABEL: fcmp_ult2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jae {{LBB.+_1}}
+  %1 = fcmp ult float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule1(float %x) {
+; CHECK-LABEL: fcmp_ule1
+; CHECK-NOT:   ucomiss
+  %1 = fcmp ule float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule2(float %x) {
+; CHECK-LABEL: fcmp_ule2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  ja {{LBB.+_1}}
+  %1 = fcmp ule float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une1(float %x) {
+; CHECK-LABEL: fcmp_une1
+; CHECK:       ucomiss  %xmm0, %xmm0
+; CHECK-NEXT:  jnp {{LBB.+_1}}
+  %1 = fcmp une float %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une2(float %x) {
+; CHECK-LABEL: fcmp_une2
+; CHECK:       xorps    %xmm1, %xmm1
+; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
+; CHECK-NEXT:  jne {{LBB.+_2}}
+; CHECK-NEXT:  jp {{LBB.+_2}}
+; CHECK-NEXT:  jmp {{LBB.+_1}}
+  %1 = fcmp une float %x, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq(i32 %x) {
+; CHECK-LABEL: icmp_eq
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp eq i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ne(i32 %x) {
+; CHECK-LABEL: icmp_ne
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ne i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ugt(i32 %x) {
+; CHECK-LABEL: icmp_ugt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ugt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_uge(i32 %x) {
+; CHECK-LABEL: icmp_uge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp uge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ult(i32 %x) {
+; CHECK-LABEL: icmp_ult
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp ult i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ule(i32 %x) {
+; CHECK-LABEL: icmp_ule
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp ule i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sgt(i32 %x) {
+; CHECK-LABEL: icmp_sgt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp sgt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sge(i32 %x) {
+; CHECK-LABEL: icmp_sge
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sge i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_slt(i32 %x) {
+; CHECK-LABEL: icmp_slt
+; CHECK-NOT:   cmpl
+; CHECK:       movl $1, %eax
+  %1 = icmp slt i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sle(i32 %x) {
+; CHECK-LABEL: icmp_sle
+; CHECK-NOT:   cmpl
+; CHECK:       movl $0, %eax
+  %1 = icmp sle i32 %x, %x
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll
index edf1263a3eac..1b72cfcde657 100644
--- a/test/CodeGen/X86/fast-isel-cmp.ll
+++ b/test/CodeGen/X86/fast-isel-cmp.ll
@@ -155,7 +155,7 @@ define zeroext i1 @fcmp_une(float %x, float %y) {
 ; FAST:       ucomiss  %xmm1, %xmm0
 ; FAST-NEXT:  setne    %al
 ; FAST-NEXT:  setp     %cl
-; FAST-NEXT:  andb     %al, %cl
+; FAST-NEXT:  orb      %al, %cl
   %1 = fcmp une float %x, %y
   ret i1 %1
 }
@@ -270,3 +270,420 @@ define zeroext i1 @icmp_sle(i32 %x, i32 %y) {
   ret i1 %1
 }
 
+; Test cmp folding and condition optimization.
+define zeroext i1 @fcmp_oeq2(float %x) {
+; SDAG-LABEL: fcmp_oeq2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oeq2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oeq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oeq3(float %x) {
+; SDAG-LABEL: fcmp_oeq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpeqss  %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_oeq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+; FAST-NEXT:  setnp    %cl
+; FAST-NEXT:  andb     %al, %cl
+  %1 = fcmp oeq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt2(float %x) {
+; SDAG-LABEL: fcmp_ogt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_ogt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp ogt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt3(float %x) {
+; SDAG-LABEL: fcmp_ogt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_ogt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  seta     %al
+  %1 = fcmp ogt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge2(float %x) {
+; SDAG-LABEL: fcmp_oge2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_oge2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp oge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge3(float %x) {
+; SDAG-LABEL: fcmp_oge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_oge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setae    %al
+  %1 = fcmp oge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt2(float %x) {
+; SDAG-LABEL: fcmp_olt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_olt2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp olt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt3(float %x) {
+; SDAG-LABEL: fcmp_olt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  seta     %al
+; FAST-LABEL: fcmp_olt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  seta     %al
+  %1 = fcmp olt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole2(float %x) {
+; SDAG-LABEL: fcmp_ole2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ole2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ole float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole3(float %x) {
+; SDAG-LABEL: fcmp_ole3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setae    %al
+; FAST-LABEL: fcmp_ole3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setae    %al
+  %1 = fcmp ole float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one2(float %x) {
+; SDAG-LABEL: fcmp_one2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: fcmp_one2
+; FAST:       xorl     %eax, %eax
+  %1 = fcmp one float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one3(float %x) {
+; SDAG-LABEL: fcmp_one3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setne    %al
+; FAST-LABEL: fcmp_one3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+  %1 = fcmp one float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord2(float %x) {
+; SDAG-LABEL: fcmp_ord2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord3(float %x) {
+; SDAG-LABEL: fcmp_ord3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setnp    %al
+; FAST-LABEL: fcmp_ord3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setnp    %al
+  %1 = fcmp ord float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno2(float %x) {
+; SDAG-LABEL: fcmp_uno2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno3(float %x) {
+; SDAG-LABEL: fcmp_uno3
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_uno3
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp uno float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq2(float %x) {
+; SDAG-LABEL: fcmp_ueq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ueq2
+; FAST:       movb     $1, %al
+  %1 = fcmp ueq float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq3(float %x) {
+; SDAG-LABEL: fcmp_ueq3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  sete     %al
+; FAST-LABEL: fcmp_ueq3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  sete     %al
+  %1 = fcmp ueq float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt2(float %x) {
+; SDAG-LABEL: fcmp_ugt2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ugt2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ugt float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt3(float %x) {
+; SDAG-LABEL: fcmp_ugt3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ugt3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ugt float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge2(float %x) {
+; SDAG-LABEL: fcmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_uge2
+; FAST:       movb     $1, %al
+  %1 = fcmp uge float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge3(float %x) {
+; SDAG-LABEL: fcmp_uge3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm0, %xmm1
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_uge3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm0, %xmm1
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp uge float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult2(float %x) {
+; SDAG-LABEL: fcmp_ult2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_ult2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp ult float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult3(float %x) {
+; SDAG-LABEL: fcmp_ult3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setb     %al
+; FAST-LABEL: fcmp_ult3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setb     %al
+  %1 = fcmp ult float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule2(float %x) {
+; SDAG-LABEL: fcmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: fcmp_ule2
+; FAST:       movb     $1, %al
+  %1 = fcmp ule float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule3(float %x) {
+; SDAG-LABEL: fcmp_ule3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  ucomiss  %xmm1, %xmm0
+; SDAG-NEXT:  setbe    %al
+; FAST-LABEL: fcmp_ule3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setbe    %al
+  %1 = fcmp ule float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une2(float %x) {
+; SDAG-LABEL: fcmp_une2
+; SDAG:       ucomiss  %xmm0, %xmm0
+; SDAG-NEXT:  setp     %al
+; FAST-LABEL: fcmp_une2
+; FAST:       ucomiss  %xmm0, %xmm0
+; FAST-NEXT:  setp     %al
+  %1 = fcmp une float %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une3(float %x) {
+; SDAG-LABEL: fcmp_une3
+; SDAG:       xorps    %xmm1, %xmm1
+; SDAG-NEXT:  cmpneqss %xmm1, %xmm0
+; SDAG-NEXT:  movd     %xmm0, %eax
+; SDAG-NEXT:  andl     $1, %eax
+; FAST-LABEL: fcmp_une3
+; FAST:       xorps    %xmm1, %xmm1
+; FAST-NEXT:  ucomiss  %xmm1, %xmm0
+; FAST-NEXT:  setne    %al
+; FAST-NEXT:  setp     %cl
+; FAST-NEXT:  orb      %al, %cl
+  %1 = fcmp une float %x, 0.000000e+00
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_eq2(i32 %x) {
+; SDAG-LABEL: icmp_eq2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_eq2
+; FAST:       movb     $1, %al
+  %1 = icmp eq i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ne2(i32 %x) {
+; SDAG-LABEL: icmp_ne2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ne2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ne i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ugt2(i32 %x) {
+; SDAG-LABEL: icmp_ugt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ugt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ugt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_uge2(i32 %x) {
+; SDAG-LABEL: icmp_uge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_uge2
+; FAST:       movb     $1, %al
+  %1 = icmp uge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ult2(i32 %x) {
+; SDAG-LABEL: icmp_ult2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_ult2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp ult i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_ule2(i32 %x) {
+; SDAG-LABEL: icmp_ule2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_ule2
+; FAST:       movb     $1, %al
+  %1 = icmp ule i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sgt2(i32 %x) {
+; SDAG-LABEL: icmp_sgt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_sgt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp sgt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sge2(i32 %x) {
+; SDAG-LABEL: icmp_sge2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sge2
+; FAST:       movb     $1, %al
+  %1 = icmp sge i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_slt2(i32 %x) {
+; SDAG-LABEL: icmp_slt2
+; SDAG:       xorl     %eax, %eax
+; FAST-LABEL: icmp_slt2
+; FAST:       xorl     %eax, %eax
+  %1 = icmp slt i32 %x, %x
+  ret i1 %1
+}
+
+define zeroext i1 @icmp_sle2(i32 %x) {
+; SDAG-LABEL: icmp_sle2
+; SDAG:       movb     $1, %al
+; FAST-LABEL: icmp_sle2
+; FAST:       movb     $1, %al
+  %1 = icmp sle i32 %x, %x
+  ret i1 %1
+}
+
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
new file mode 100644
index 000000000000..954a9d994e61
--- /dev/null
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -0,0 +1,325 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
+
+define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test1_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2
+  %vecext10 = extractelement <4 x float> %b, i32 2
+  %vecext11 = extractelement <4 x float> %b, i32 3
+  %add12 = fadd float %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
+  ret <4 x float> %vecinit13
+}
+; CHECK-LABEL: test2_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  %vecext6 = extractelement <4 x float> %b, i32 0
+  %vecext7 = extractelement <4 x float> %b, i32 1
+  %add8 = fadd float %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
+  ret <4 x float> %vecinit9
+}
+; CHECK-LABEL: test3_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  ret <4 x float> %vecinit
+}
+; CHECK-LABEL: test4_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
+  %vecext = extractelement <2 x double> %a, i32 0
+  %vecext1 = extractelement <2 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <2 x double> undef, double %add, i32 0
+  ret <2 x double> %vecinit
+}
+; CHECK-LABEL: test5_undef
+; CHECK-NOT: haddpd
+; CHECK: ret
+
+
+define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test6_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %b, i32 0
+  %vecext1 = extractelement <4 x float> %b, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 2
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test7_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %a, i32 2
+  %vecext3 = extractelement <4 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test8_undef
+; CHECK-NOT: haddps
+; CHECK: ret
+
+
+define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) {
+  %vecext = extractelement <4 x float> %a, i32 0
+  %vecext1 = extractelement <4 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <4 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <4 x float> %b, i32 2
+  %vecext3 = extractelement <4 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
+  ret <4 x float> %vecinit5
+}
+; CHECK-LABEL: test9_undef
+; CHECK: haddps
+; CHECK-NEXT: ret
+
+define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 2
+  %vecext3 = extractelement <8 x float> %b, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test10_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %b, i32 4
+  %vecext3 = extractelement <8 x float> %b, i32 5
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test11_undef
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK: ret
+
+define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add = fadd float %vecext, %vecext1
+  %vecinit = insertelement <8 x float> undef, float %add, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add4 = fadd float %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
+  ret <8 x float> %vecinit5
+}
+; CHECK-LABEL: test12_undef
+; SSE: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) {
+  %vecext = extractelement <8 x float> %a, i32 0
+  %vecext1 = extractelement <8 x float> %a, i32 1
+  %add1 = fadd float %vecext, %vecext1
+  %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0
+  %vecext2 = extractelement <8 x float> %a, i32 2
+  %vecext3 = extractelement <8 x float> %a, i32 3
+  %add2 = fadd float %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1
+  %vecext4 = extractelement <8 x float> %a, i32 4
+  %vecext5 = extractelement <8 x float> %a, i32 5
+  %add3 = fadd float %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2
+  %vecext6 = extractelement <8 x float> %a, i32 6
+  %vecext7 = extractelement <8 x float> %a, i32 7
+  %add4 = fadd float %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3
+  ret <8 x float> %vecinit4
+}
+; CHECK-LABEL: test13_undef
+; SSE: haddps
+; SSE-NOT: haddps
+; AVX: vhaddps
+; AVX2: vhaddps
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 2
+  %vecext3 = extractelement <8 x i32> %b, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test14_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: phaddd
+; CHECK: ret
+
+; On AVX2, the following sequence can be folded into a single horizontal add.
+; If the Subtarget doesn't support AVX2, then we avoid emitting two packed 
+; integer horizontal adds instead of two scalar adds followed by vector inserts.
+define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %b, i32 4
+  %vecext3 = extractelement <8 x i32> %b, i32 5
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test15_undef
+; SSE-NOT: phaddd
+; AVX-NOT: vphaddd
+; AVX2: vphaddd
+; CHECK: ret
+
+define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add = add i32 %vecext, %vecext1
+  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add4 = add i32 %vecext2, %vecext3
+  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
+  ret <8 x i32> %vecinit5
+}
+; CHECK-LABEL: test16_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+
+define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) {
+  %vecext = extractelement <8 x i32> %a, i32 0
+  %vecext1 = extractelement <8 x i32> %a, i32 1
+  %add1 = add i32 %vecext, %vecext1
+  %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0
+  %vecext2 = extractelement <8 x i32> %a, i32 2
+  %vecext3 = extractelement <8 x i32> %a, i32 3
+  %add2 = add i32 %vecext2, %vecext3
+  %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1
+  %vecext4 = extractelement <8 x i32> %a, i32 4
+  %vecext5 = extractelement <8 x i32> %a, i32 5
+  %add3 = add i32 %vecext4, %vecext5
+  %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2
+  %vecext6 = extractelement <8 x i32> %a, i32 6
+  %vecext7 = extractelement <8 x i32> %a, i32 7
+  %add4 = add i32 %vecext6, %vecext7
+  %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3
+  ret <8 x i32> %vecinit4
+}
+; CHECK-LABEL: test17_undef
+; SSE: phaddd
+; AVX: vphaddd
+; AVX2: vphaddd
+; CHECK-NOT: haddps
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/libcall-sret.ll b/test/CodeGen/X86/libcall-sret.ll
new file mode 100644
index 000000000000..67b99ac239cd
--- /dev/null
+++ b/test/CodeGen/X86/libcall-sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=i686-linux-gnu -o - %s | FileCheck %s
+
+@var = global i128 0
+
+; We were trying to convert the i128 operation into a libcall, but failing to
+; perform sret demotion when we couldn't return the result in registers. Make
+; sure we marshal the return properly:
+
+define void @test_sret_libcall(i128 %l, i128 %r) {
+; CHECK-LABEL: test_sret_libcall:
+
+  ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical
+  ; (aligned) place for the actual sret data is %esp + 40.
+; CHECK: leal 40(%esp), [[SRET_ADDR:%[a-z]+]]
+; CHECK: movl [[SRET_ADDR]], (%esp)
+; CHECK: calll __multi3
+; CHECK-DAG: movl 40(%esp), [[RES0:%[a-z]+]]
+; CHECK-DAG: movl 44(%esp), [[RES1:%[a-z]+]]
+; CHECK-DAG: movl 48(%esp), [[RES2:%[a-z]+]]
+; CHECK-DAG: movl 52(%esp), [[RES3:%[a-z]+]]
+; CHECK-DAG: movl [[RES0]], var
+; CHECK-DAG: movl [[RES1]], var+4
+; CHECK-DAG: movl [[RES2]], var+8
+; CHECK-DAG: movl [[RES3]], var+12
+  %prod = mul i128 %l, %r
+  store i128 %prod, i128* @var
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-static-relo-movl.ll b/test/CodeGen/X86/x86-64-static-relo-movl.ll
index b184df461968..71e52bb99191 100644
--- a/test/CodeGen/X86/x86-64-static-relo-movl.ll
+++ b/test/CodeGen/X86/x86-64-static-relo-movl.ll
@@ -17,7 +17,7 @@ define void @setup() {
 done:
   ret void
 
-  ; CHECK: movl $_NO_MATCH, {{.*}}
+  ; CHECK: movabsq $_NO_MATCH, {{.*}}
 }
 
 ; Function Attrs: nounwind
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index c78b8b8f628f..65907d679780 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o /dev/null
-; Here variable bar is optimzied away. Do not trip over while trying to generate debug info.
+; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
 
 
 define i32 @foo() nounwind uwtable readnone ssp {
diff --git a/test/DebugInfo/AArch64/eh_frame.s b/test/DebugInfo/AArch64/eh_frame.s
index d8d6b6d9325f..12a58961d717 100644
--- a/test/DebugInfo/AArch64/eh_frame.s
+++ b/test/DebugInfo/AArch64/eh_frame.s
@@ -17,7 +17,7 @@ foo:
 // Output is:
 
 // CHECK: Contents of section .eh_frame:
-// CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+// CHECK-NEXT: 0000 10000000 00000000 037a5200 017c1e01  .........zR..|..
 // CHECK-NEXT: 0010 1b0c1f00 10000000 18000000 00000000  ................
 
 
@@ -30,7 +30,7 @@ foo:
 // -------------------
 // 10000000: length of first CIE = 0x10
 // 00000000: This is a CIE
-// 01: version = 0x1
+// 03: version = 0x3
 // 7a 52 00: augmentation string "zR" -- pointer format is specified
 // 01: code alignment factor 1
 // 7c: data alignment factor -4
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
index d35f2a2fcafb..51d6bf80b950 100644
--- a/test/DebugInfo/AArch64/eh_frame_personality.ll
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -16,7 +16,7 @@ clean:
 }
 
 ; CHECK: Contents of section .eh_frame:
-; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0000 1c000000 00000000 037a504c 5200017c  .........zPLR..|
 ; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
 
 ; Don't really care about the rest:
@@ -33,7 +33,7 @@ clean:
 ; ----------
 ; 1c000000: Length = 0x1c
 ; 00000000: This is a CIE
-; 01: Version 1
+; 03: Version 3
 ; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
 ; 01: Code alignment factor 1
 ; 78: Data alignment factor: -8
diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s
index 4e7afd56e94b..d55b6cdea8c3 100644
--- a/test/DebugInfo/SystemZ/eh_frame.s
+++ b/test/DebugInfo/SystemZ/eh_frame.s
@@ -23,7 +23,7 @@ check_largest_class:
 # Contents of the .eh_frame section:
 #
 # 00000000 0000001c 00000000 CIE
-#   Version:               1
+#   Version:               3
 #   Augmentation:          "zR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -48,7 +48,7 @@ check_largest_class:
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 00000014 00000000 017a5200 01780e01  .........zR..x..
+# CHECK-NEXT: 0000 00000014 00000000 037a5200 01780e01  .........zR..x..
 # CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c  ................
 # CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05  .........F......
 # CHECK-NEXT: 0030 440ec002 00000000                    D.......
diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.s b/test/DebugInfo/SystemZ/eh_frame_personality.s
index 46b46db1d806..456e0a6e6bdd 100644
--- a/test/DebugInfo/SystemZ/eh_frame_personality.s
+++ b/test/DebugInfo/SystemZ/eh_frame_personality.s
@@ -37,7 +37,7 @@ DW.ref.__gxx_personality_v0:
 # Contents of the .eh_frame section:
 #
 # 00000000 0000001c 00000000 CIE
-#   Version:               1
+#   Version:               3
 #   Augmentation:          "zPLR"
 #   Code alignment factor: 1
 #   Data alignment factor: -8
@@ -61,7 +61,7 @@ DW.ref.__gxx_personality_v0:
 #   DW_CFA_nop
 #
 # CHECK: Contents of section .eh_frame:
-# CHECK-NEXT: 0000 0000001c 00000000 017a504c 52000178  .........zPLR..x
+# CHECK-NEXT: 0000 0000001c 00000000 037a504c 52000178  .........zPLR..x
 # CHECK-NEXT: 0010 0e079b00 0000001b 1b0c0fa0 01000000  ................
 # CHECK-NEXT: 0020 0000001c 00000024 00000000 00000012  .......$........
 # CHECK-NEXT: 0030 04000000 00468e06 8f05440e c0020000  .....F....D.....
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index 0e02c6787175..746b1dcaa3eb 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -9,11 +9,11 @@
 ; }
 ; with "clang++ -S -emit-llvm -fsanitize=address -O0 -g test.cc"
 
-; First, argument variable "y" resides in %rdx:
-; CHECK: DEBUG_VALUE: bar:y <- RDX
+; First, argument variable "y" resides in %rdi:
+; CHECK: DEBUG_VALUE: bar:y <- RDI
 
 ; Then its address is stored in a location on a stack:
-; CHECK: movq %rdx, [[OFFSET:[0-9]+]](%rsp)
+; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp)
 ; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
 ; CHECK-NEXT: DEBUG_VALUE: bar:y <- [RSP+[[OFFSET]]]
 ; This location should be valid until the end of the function.
@@ -26,7 +26,7 @@
 ; CHECK-NEXT: .quad .Lset{{[0-9]+}}
 ; CHECK-NEXT: .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
 ; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK: DW_OP_reg1
+; CHECK: DW_OP_reg5
 
 ; Then it's addressed via %rsp:
 ; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index 057039c3c5cc..135837582fcc 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -44,6 +44,13 @@
 ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
 ; HDR-NOT: .rela.{{.*}}.dwo
 
+; Make sure we have enough stuff in the debug_addr to cover the address indexes
+; (6 is the last index in debug_loc.dwo, making 7 entries of 8 bytes each, 7 * 8
+; == 56 base 10 == 38 base 16)
+
+; HDR: .debug_addr 00000038
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; From the code:
 
 ; extern int c;
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
index c515114fd50b..3c97f0cb2279 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/global.ll
@@ -3,6 +3,9 @@
 ; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
+; Also test that the null streamer doesn't crash with debug info.
+; RUN: %llc_dwarf -O0 -filetype=null < %s
+
 ; generated from the following source compiled to bitcode with clang -g -O1
 ; static int i;
 ; int main() {
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index 9ec54a583f86..f6673df3c358 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -2,6 +2,9 @@ if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
     config.unsupported = True
 
 # CMake and autoconf diverge in naming or host_arch
+if 'powerpc64' in config.root.target_triple:
+    config.unsupported = True
+
 if 'aarch64' in config.root.target_triple \
     or 'arm64' in config.root.target_triple:
         config.unsupported = True
diff --git a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
new file mode 100644
index 000000000000..e068f69ae4ba
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check instrumentation mul when one of the operands is a constant.
+
+define i64 @MulConst(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 42949672960000
+  ret i64 %y
+}
+
+; 42949672960000 = 2**32 * 10000
+; 36 trailing zero bits
+; 68719476736 = 2**36
+
+; CHECK-LABEL: @MulConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 68719476736
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulZero(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 0
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulZero(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 0{{$}}
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -16
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulNeg2(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -48
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulNeg2(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 16
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulOdd(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, 12345
+  ret i64 %y
+}
+
+; CHECK-LABEL: @MulOdd(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 1
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+
+define i64 @MulLarge(i64 %x) sanitize_memory {
+entry:
+  %y = mul i64 %x, -9223372036854775808
+  ret i64 %y
+}
+
+; -9223372036854775808 = 0x7000000000000000
+
+; CHECK-LABEL: @MulLarge(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], -9223372036854775808
+; CHECK: store i64 [[B]], i64* {{.*}} @__msan_retval_tls
+
+define <4 x i32> @MulVectorConst(<4 x i32> %x) sanitize_memory {
+entry:
+  %y = mul <4 x i32> %x, <i32 3072, i32 0, i32 -16, i32 -48>
+  ret <4 x i32> %y
+}
+
+; CHECK-LABEL: @MulVectorConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul <4 x i32> [[A]], <i32 1024, i32 0, i32 16, i32 16>
+; CHECK: store <4 x i32> [[B]], <4 x i32>* {{.*}} @__msan_retval_tls
diff --git a/test/Instrumentation/MemorySanitizer/vector_arith.ll b/test/Instrumentation/MemorySanitizer/vector_arith.ll
new file mode 100644
index 000000000000..6541a1c3a394
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_arith.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory {
+entry:
+  %c = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) nounwind
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: @Test_sse2_pmadd_wd(
+; CHECK: or <8 x i16>
+; CHECK: bitcast <8 x i16> {{.*}} to <4 x i32>
+; CHECK: icmp ne <4 x i32> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i32>
+; CHECK: ret <4 x i32>
+
+
+define x86_mmx @Test_ssse3_pmadd_ub_sw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_ssse3_pmadd_ub_sw(
+; CHECK: or i64
+; CHECK: bitcast i64 {{.*}} to <4 x i16>
+; CHECK: icmp ne <4 x i16> {{.*}}, zeroinitializer
+; CHECK: sext <4 x i1> {{.*}} to <4 x i16>
+; CHECK: bitcast <4 x i16> {{.*}} to i64
+; CHECK: ret x86_mmx
+
+
+define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_memory {
+  %c = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a, <16 x i8> %b)
+  ret <2 x i64> %c
+}
+
+; CHECK-LABEL: @Test_x86_sse2_psad_bw(
+; CHECK: or <16 x i8> {{.*}}, {{.*}}
+; CHECK: bitcast <16 x i8> {{.*}} to <2 x i64>
+; CHECK: icmp ne <2 x i64> {{.*}}, zeroinitializer
+; CHECK: sext <2 x i1> {{.*}} to <2 x i64>
+; CHECK: lshr <2 x i64> {{.*}}, <i64 48, i64 48>
+; CHECK: ret <2 x i64>
+
+
+define x86_mmx @Test_x86_mmx_psad_bw(x86_mmx %a, x86_mmx %b) sanitize_memory {
+entry:
+  %c = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind
+  ret x86_mmx %c
+}
+
+; CHECK-LABEL: @Test_x86_mmx_psad_bw(
+; CHECK: or i64
+; CHECK: icmp ne i64
+; CHECK: sext i1 {{.*}} to i64
+; CHECK: lshr i64 {{.*}}, 48
+; CHECK: ret x86_mmx
diff --git a/test/LTO/jump-table-type.ll b/test/LTO/jump-table-type.ll
new file mode 100644
index 000000000000..a39d3e959830
--- /dev/null
+++ b/test/LTO/jump-table-type.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-lto -o %t2 %t1 -jump-table-type=arity
+; RUN: llvm-nm %t2 | FileCheck %s
+
+; CHECK: T __llvm_jump_instr_table_0_1
+; CHECK: T __llvm_jump_instr_table_1_1
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @g(i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @main() {
+  ret i32 0
+}
+
+@llvm.used = appending global [2 x i8*]  [i8* bitcast (i32(i32)* @g to i8*),
+                                          i8* bitcast (i32()* @f to i8*)]
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
new file mode 100644
index 000000000000..ed1b89eff3cd
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -0,0 +1,79 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
+// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_ranges    DW_FORM_data4
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x00000024, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      9      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      9      0      1   0   0  is_stmt end_sequence
+// DWARF-NEXT: 0x0000000000000000     13      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004     13      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 ffffffff 00000000
+// DWARF: 00000000 00000000 00000004
+// DWARF: 00000000 <End of list>
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .debug_ranges
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+// RELOC-NEXT: 00000004 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000014 R_ARM_ABS32 foo
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000018 R_ARM_ABS32 foo
+
+
+// VERSION: {{.*}} error: DWARF2 only supports one section per compilation unit
+
+// DWARF1: Dwarf version 1 is not supported.
+// DWARF5: Dwarf version 5 is not supported.
diff --git a/test/MC/ARM/dwarf-asm-no-code.s b/test/MC/ARM/dwarf-asm-no-code.s
new file mode 100644
index 000000000000..7d06a4190091
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-no-code.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+// If there is no code in an assembly file, no debug info is produced
+
+.section .data, "aw"
+a:
+.long 42
+
+// DWARF: .debug_abbrev contents:
+// DWARF-NEXT: < EMPTY >
+
+// DWARF: .debug_info contents:
+
+// DWARF: .debug_aranges contents:
+
+// DWARF: .debug_line contents:
+
+// DWARF: .debug_ranges contents:
+
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_info]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_aranges]:
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
new file mode 100644
index 000000000000..497a39ad1162
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// DWARF-NOT:         DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("b")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0  is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+// RELOC-NEXT: R_ARM_ABS32 foo
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 foo
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
new file mode 100644
index 000000000000..c57e6498a38a
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0 is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0 is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
diff --git a/test/MC/AsmParser/conditional_asm.s b/test/MC/AsmParser/conditional_asm.s
index b9bee33c6a16..ecbceb1dc362 100644
--- a/test/MC/AsmParser/conditional_asm.s
+++ b/test/MC/AsmParser/conditional_asm.s
@@ -11,6 +11,66 @@
     .endif
 .endif
 
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+.ifeq 32 - 32
+        .byte 0
+.else
+        .byte 1
+.endif
+
+# CHECK: .byte 0
+# CHECK: .byte 1
+# CHECK-NOT: .byte 2
+.ifge 32 - 31
+        .byte 0
+.endif
+.ifge 32 - 32
+        .byte 1
+.endif
+.ifge 32 - 33
+        .byte 2
+.endif
+
+# CHECK: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK-NOT: .byte 2
+.ifgt 32 - 31
+        .byte 0
+.endif
+.ifgt 32 - 32
+        .byte 1
+.endif
+.ifgt 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+# CHECK: .byte 2
+.ifle 32 - 31
+        .byte 0
+.endif
+.ifle 32 - 32
+        .byte 1
+.endif
+.ifle 32 - 33
+        .byte 2
+.endif
+
+# CHECK-NOT: .byte 0
+# CHECK-NOT: .byte 1
+# CHECK: .byte 2
+.iflt 32 - 31
+        .byte 0
+.endif
+.iflt 32 - 32
+        .byte 1
+.endif
+.iflt 32 - 33
+        .byte 2
+.endif
+
 # CHECK: .byte 1
 # CHECK-NOT: .byte 0
 .ifne 32 - 32
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 9b99e0f24e99..d7290ebe1dbe 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file "hello"
         .file 1 "worl\144"   # "\144" is "d"
diff --git a/test/MC/AsmParser/directive_line.s b/test/MC/AsmParser/directive_line.s
index 94ce44602998..110b68a46216 100644
--- a/test/MC/AsmParser/directive_line.s
+++ b/test/MC/AsmParser/directive_line.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 # FIXME: Actually test the output.
 
         .line
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index cda9579fb272..404ebcecdd0a 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-unknown %s -filetype=null
 
         .file 1 "hello"
 # CHECK: .file 1 "hello"
diff --git a/test/MC/AsmParser/if-diagnostics.s b/test/MC/AsmParser/if-diagnostics.s
new file mode 100644
index 000000000000..d102a5686d98
--- /dev/null
+++ b/test/MC/AsmParser/if-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple i386 %s -o /dev/null 2>&1 | FileCheck %s
+
+.if
+.endif
+
+// CHECK: error: unknown token in expression
+// CHECK: .if
+// CHECK:   ^
+
+.ifeq 0, 3
+.endif
+
+// CHECK:error: unexpected token in '.if' directive
+// CHECK: .ifeq 0, 3
+// CHECK:        ^
+
+.iflt "string1"
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .iflt "string1"
+// CHECK:       ^
+
+.ifge test
+.endif
+
+// CHECK: error: expected absolute expression
+// CHECK: .ifge test
+// CHECK:       ^
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
index b27668ea3376..e3236b072d12 100644
--- a/test/MC/AsmParser/vararg.s
+++ b/test/MC/AsmParser/vararg.s
@@ -17,6 +17,12 @@
 .endif
 .endm
 
+.macro ifcc4 arg0, arg1:vararg
+.if cc
+            movl \arg1, \arg0
+.endif
+.endm
+
 .text
 
 // CHECK: movl %esp, %ebp
@@ -25,6 +31,8 @@
 // CHECK: movl %ecx, %ebx
 // CHECK: movl %ecx, %eax
 // CHECK: movl %eax, %ecx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
 .set cc,1
   ifcc  movl    %esp, %ebp
         subl $0, %esp
@@ -33,6 +41,8 @@
   ifcc2 %ecx, %ebx
   ifcc3 %ecx %eax
   ifcc3 %eax, %ecx
+  ifcc4 %eax %ecx  ## test
+  ifcc4 %ecx, %eax ## test
 
 // CHECK-NOT movl
 // CHECK: subl $1, %esp
diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt
index f64bcfd2a12f..35ddd56b0afa 100644
--- a/test/MC/Disassembler/Mips/mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6.txt
@@ -42,6 +42,8 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt
index a2ef0d6d8f22..951cc5373c0e 100644
--- a/test/MC/Disassembler/Mips/mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6.txt
@@ -42,6 +42,8 @@
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
 0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
 0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index b3768cb9834c..9d639f70d8dd 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -28,7 +28,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 410E1444
 // CHECK-NEXT:       0030: 0E080000 00000000
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index d7a53c462b70..98caa0185f59 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01010000 00030001 0E080000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index eac2c731fa93..59f740055d47 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -27,7 +27,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 0A000000 00440E10 450E0800
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 00d8b99af9d6..178ba32882dc 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410D06 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 36e147f5a4da..dfb0d4b59396 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410C07 08000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 839d6717debc..5394ee414aa7 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00411507 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 951a6001e519..a65b4fc783c7 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00418602 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 4abbb53b8fc9..94417702c13c 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410906 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 34254c862a46..0dc69c89cf4c 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -31,7 +31,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 24000000 1C000000
 // CHECK-NEXT:       0020: 00000000 05000000 00410E08 410D0641
 // CHECK-NEXT:       0030: 11067F41 0E104186 02000000 00000000
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 3de769f39fa0..360e7b0ea0f5 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -23,7 +23,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 00411106 7F000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 98c759d4fffc..3a38948b6a3e 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 03000000 00410A41 0B000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index d25b5ff2e93f..e225797f54d6 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 0041C600 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 9f5ae4be9ed4..2d37f4d0b43e 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410806 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
index 15a79e5c055e..b0ba543e5bdb 100644
--- a/test/MC/ELF/cfi-sections.s
+++ b/test/MC/ELF/cfi-sections.s
@@ -26,7 +26,7 @@ f2:
 // ELF_64-NEXT:     AddressAlignment: 8
 // ELF_64-NEXT:     EntrySize: 0
 // ELF_64-NEXT:     SectionData (
-// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 01000178 100C0708
+// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 03000178 100C0708
 // ELF_64-NEXT:       0010: 90010000 00000000 14000000 00000000
 // ELF_64-NEXT:       0020: 00000000 00000000 01000000 00000000
 // ELF_64-NEXT:       0030: 14000000 00000000 00000000 00000000
@@ -47,7 +47,7 @@ f2:
 // ELF_32-NEXT:     AddressAlignment: 4
 // ELF_32-NEXT:     EntrySize: 0
 // ELF_32-NEXT:     SectionData (
-// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0100017C 080C0404
+// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0300017C 080C0404
 // ELF_32-NEXT:       0010: 88010000 0C000000 00000000 00000000
 // ELF_32-NEXT:       0020: 01000000 0C000000 00000000 01000000
 // ELF_32-NEXT:       0030: 01000000
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index 023311962189..98deb0a1de5c 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -23,10 +23,10 @@ g:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5253 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A5253 00017810
 // CHECK-NEXT:       0010: 011B0C07 08900100 10000000 1C000000
 // CHECK-NEXT:       0020: 00000000 00000000 00000000 14000000
-// CHECK-NEXT:       0030: 00000000 017A5200 01781001 1B0C0708
+// CHECK-NEXT:       0030: 00000000 037A5200 01781001 1B0C0708
 // CHECK-NEXT:       0040: 90010000 10000000 1C000000 00000000
 // CHECK-NEXT:       0050: 00000000 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 9773a36a3b03..568b3159cc44 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -24,7 +24,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00410706 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
new file mode 100644
index 000000000000..0588fcd45da6
--- /dev/null
+++ b/test/MC/ELF/cfi-version.ll
@@ -0,0 +1,42 @@
+; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
+; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF34
+
+; Function Attrs: nounwind
+define i32 @foo() #0 {
+entry:
+  %call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12
+  %add = add nsw i32 %call, 1, !dbg !12
+  ret i32 %add, !dbg !12
+}
+
+declare i32 @bar(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0 "}
+!12 = metadata !{i32 2, i32 0, metadata !4, null}
+
+; DWARF2:      .debug_frame contents:
+; DWARF2:        Version:               1
+; DWARF2-NEXT:   Augmentation:
+
+; DWARF34:      .debug_frame contents:
+; DWARF34:        Version:               3
+; DWARF34-NEXT:   Augmentation:
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index c7d438a19260..b083901c137a 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -26,7 +26,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 02000000 00412D00 00000000
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 05cb0ae35bd2..8662839b5274 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -30,7 +30,7 @@ f:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0000: 14000000 00000000 037A5200 01781001
 // CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
 // CHECK-NEXT:       0020: 00000000 04000000 00410E10 410A0E08
 // CHECK-NEXT:       0030: 410B0000 00000000
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index fd229b6064ad..21be615c5f39 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -234,116 +234,116 @@ f37:
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
+// CHECK-NEXT:       0000: 14000000 00000000 037A4C52 00017810
 // CHECK-NEXT:       0010: 02031B0C 07089001 14000000 1C000000
 // CHECK-NEXT:       0020: 00000000 01000000 04000000 00000000
-// CHECK-NEXT:       0030: 20000000 00000000 017A504C 52000178
+// CHECK-NEXT:       0030: 20000000 00000000 037A504C 52000178
 // CHECK-NEXT:       0040: 100B0000 00000000 00000003 1B0C0708
 // CHECK-NEXT:       0050: 90010000 14000000 28000000 00000000
 // CHECK-NEXT:       0060: 01000000 04000000 00000000 14000000
 // CHECK-NEXT:       0070: 70000000 00000000 01000000 04000000
-// CHECK-NEXT:       0080: 00000000 20000000 00000000 017A504C
+// CHECK-NEXT:       0080: 00000000 20000000 00000000 037A504C
 // CHECK-NEXT:       0090: 52000178 100B0000 00000000 00000002
 // CHECK-NEXT:       00A0: 1B0C0708 90010000 10000000 28000000
 // CHECK-NEXT:       00B0: 00000000 01000000 02000000 18000000
-// CHECK-NEXT:       00C0: 00000000 017A5052 00017810 04020000
+// CHECK-NEXT:       00C0: 00000000 037A5052 00017810 04020000
 // CHECK-NEXT:       00D0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       00E0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       00F0: 00000000 017A5052 00017810 06030000
+// CHECK-NEXT:       00F0: 00000000 037A5052 00017810 06030000
 // CHECK-NEXT:       0100: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0110: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0120: 00000000 017A5052 00017810 0A040000
+// CHECK-NEXT:       0120: 00000000 037A5052 00017810 0A040000
 // CHECK-NEXT:       0130: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0140: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0150: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0150: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0160: 040A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0170: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0180: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0180: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0190: 060B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       01A0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       01B0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       01B0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       01C0: 0A0C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       01D0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       01F0: 00017810 0A080000 00000000 00001B0C
 // CHECK-NEXT:       0200: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0210: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0220: 017A5052 00017810 0A100000 00000000
+// CHECK-NEXT:       0220: 037A5052 00017810 0A100000 00000000
 // CHECK-NEXT:       0230: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0240: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0250: 00000000 017A5052 00017810 04120000
+// CHECK-NEXT:       0250: 00000000 037A5052 00017810 04120000
 // CHECK-NEXT:       0260: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0270: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0280: 00000000 017A5052 00017810 06130000
+// CHECK-NEXT:       0280: 00000000 037A5052 00017810 06130000
 // CHECK-NEXT:       0290: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       02A0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       02B0: 00000000 017A5052 00017810 0A140000
+// CHECK-NEXT:       02B0: 00000000 037A5052 00017810 0A140000
 // CHECK-NEXT:       02C0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       02D0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       02E0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       02E0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       02F0: 041A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0300: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0310: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0310: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0320: 061B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0330: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0340: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0340: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0350: 0A1C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0360: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0370: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0370: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0380: 00017810 0A180000 00000000 00001B0C
 // CHECK-NEXT:       0390: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       03A0: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       03B0: 017A5052 00017810 0A800000 00000000
+// CHECK-NEXT:       03B0: 037A5052 00017810 0A800000 00000000
 // CHECK-NEXT:       03C0: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       03D0: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       03E0: 00000000 017A5052 00017810 04820000
+// CHECK-NEXT:       03E0: 00000000 037A5052 00017810 04820000
 // CHECK-NEXT:       03F0: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0400: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0410: 00000000 017A5052 00017810 06830000
+// CHECK-NEXT:       0410: 00000000 037A5052 00017810 06830000
 // CHECK-NEXT:       0420: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       0430: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       0440: 00000000 017A5052 00017810 0A840000
+// CHECK-NEXT:       0440: 00000000 037A5052 00017810 0A840000
 // CHECK-NEXT:       0450: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0460: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0470: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0470: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0480: 048A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0490: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04A0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04A0: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04B0: 068B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       04C0: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       04D0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04D0: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       04E0: 0A8C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       04F0: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0500: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0500: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       0510: 00017810 0A880000 00000000 00001B0C
 // CHECK-NEXT:       0520: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       0530: 01000000 00000000 1C000000 00000000
-// CHECK-NEXT:       0540: 017A5052 00017810 0A900000 00000000
+// CHECK-NEXT:       0540: 037A5052 00017810 0A900000 00000000
 // CHECK-NEXT:       0550: 00001B0C 07089001 10000000 24000000
 // CHECK-NEXT:       0560: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       0570: 00000000 017A5052 00017810 04920000
+// CHECK-NEXT:       0570: 00000000 037A5052 00017810 04920000
 // CHECK-NEXT:       0580: 1B0C0708 90010000 10000000 20000000
 // CHECK-NEXT:       0590: 00000000 01000000 00000000 18000000
-// CHECK-NEXT:       05A0: 00000000 017A5052 00017810 06930000
+// CHECK-NEXT:       05A0: 00000000 037A5052 00017810 06930000
 // CHECK-NEXT:       05B0: 00001B0C 07089001 10000000 20000000
 // CHECK-NEXT:       05C0: 00000000 01000000 00000000 1C000000
-// CHECK-NEXT:       05D0: 00000000 017A5052 00017810 0A940000
+// CHECK-NEXT:       05D0: 00000000 037A5052 00017810 0A940000
 // CHECK-NEXT:       05E0: 00000000 00001B0C 07089001 10000000
 // CHECK-NEXT:       05F0: 24000000 00000000 01000000 00000000
-// CHECK-NEXT:       0600: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0600: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0610: 049A0000 1B0C0708 90010000 10000000
 // CHECK-NEXT:       0620: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0630: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0630: 18000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0640: 069B0000 00001B0C 07089001 10000000
 // CHECK-NEXT:       0650: 20000000 00000000 01000000 00000000
-// CHECK-NEXT:       0660: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0660: 1C000000 00000000 037A5052 00017810
 // CHECK-NEXT:       0670: 0A9C0000 00000000 00001B0C 07089001
 // CHECK-NEXT:       0680: 10000000 24000000 00000000 01000000
-// CHECK-NEXT:       0690: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0690: 00000000 1C000000 00000000 037A5052
 // CHECK-NEXT:       06A0: 00017810 0A980000 00000000 00001B0C
 // CHECK-NEXT:       06B0: 07089001 10000000 24000000 00000000
 // CHECK-NEXT:       06C0: 01000000 00000000 10000000 00000000
-// CHECK-NEXT:       06D0: 017A5200 01781001 1B000000 10000000
+// CHECK-NEXT:       06D0: 037A5200 01781001 1B000000 10000000
 // CHECK-NEXT:       06E0: 18000000 00000000 01000000 00000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
diff --git a/test/MC/Mips/cpsetup-bad.s b/test/MC/Mips/cpsetup-bad.s
new file mode 100644
index 000000000000..09252a1310ed
--- /dev/null
+++ b/test/MC/Mips/cpsetup-bad.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -triple mips64-unknown-unknown 2>%t1
+# RUN:   FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .option pic2
+t1:
+        .cpsetup $bar, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: expected register containing function address
+        .cpsetup $33, 8, __cerror
+# ASM: :[[@LINE-1]]:18: error: invalid register
+        .cpsetup $31, foo, __cerror
+# ASM: :[[@LINE-1]]:23: error: expected save register or stack offset
+        .cpsetup $31, $32, __cerror
+# ASM: :[[@LINE-1]]:23: error: invalid register
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
index 167159885d72..d6b9cf0a5405 100644
--- a/test/MC/Mips/eh-frame.s
+++ b/test/MC/Mips/eh-frame.s
@@ -31,7 +31,7 @@ func:
 // MIPS32: 00000000
 
 // Version
-// MIPS32: 01
+// MIPS32: 03
 
 // Augmentation String
 // MIPS32: 7a5200
@@ -67,7 +67,7 @@ func:
 // MIPS32EL: 00000000
 
 // Version
-// MIPS32EL: 01
+// MIPS32EL: 03
 
 // Augmentation String
 // MIPS32EL: 7a5200
@@ -103,7 +103,7 @@ func:
 // MIPS64: 00000000
 
 // Version
-// MIPS64: 01
+// MIPS64: 03
 
 // Augmentation String
 // MIPS64: 7a5200
@@ -141,7 +141,7 @@ func:
 // MIPS64EL: 00000000
 
 // Version
-// MIPS64EL: 01
+// MIPS64EL: 03
 
 // Augmentation String
 // MIPS64EL: 7a5200
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
new file mode 100644
index 000000000000..a137deb8d172
--- /dev/null
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .text
+        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
+        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 1622965a4139..f0a04a5a2515 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -8,6 +8,8 @@
 # CHECK: addiu   $6, $zero, -2345    # encoding: [0xd7,0xf6,0x06,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK: addiu   $8, $zero, -8       # encoding: [0xf8,0xff,0x08,0x24]
+
 # CHECK: addiu   $4, $zero, 20       # encoding: [0x14,0x00,0x04,0x24]
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
@@ -32,17 +34,28 @@
 # CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
 # CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: ldc1    $f0, %lo(symbol)($1)
+# CHECK: lui     $1, %hi(symbol)
+# CHECK: sdc1    $f0, %lo(symbol)($1)
+
     li $5,123
     li $6,-2345
     li $7,65538
+    li $8, ~7
 
     la $a0, 20
     la $7,65538
     la $a0, 20($a1)
     la $7,65538($8)
 
+    .set noat
     lw  $t2, symbol($a0)
+    .set at
     sw  $t2, symbol($t1)
 
     lw  $t2, 655483($a0)
     sw  $t2, 123456($t1)
+
+    ldc1 $f0, symbol
+    sdc1 $f0, symbol
diff --git a/test/MC/Mips/mips-noat.s b/test/MC/Mips/mips-noat.s
index b83c51780776..07db251b0506 100644
--- a/test/MC/Mips/mips-noat.s
+++ b/test/MC/Mips/mips-noat.s
@@ -10,11 +10,10 @@
 test1:
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test2:
         .set noat
-        lw      $2, 65536($2)
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
+
 
 # Can we switch it back on successfully?
 # CHECK-LABEL: test3:
@@ -25,10 +24,6 @@ test3:
         .set at
         lw      $2, 65536($2)
 
-# FIXME: It would be better if the error pointed at the mnemonic instead of the newline
-# ERROR: mips-noat.s:[[@LINE+4]]:1: error: Pseudo instruction requires $at, which is not available
 test4:
         .set at=$0
-        lw      $2, 65536($2)
-
-# ERROR-NOT: error
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
diff --git a/test/MC/Mips/mips1/invalid-mips2.s b/test/MC/Mips/mips1/invalid-mips2.s
index 6c3e80ac458a..7db261d42c98 100644
--- a/test/MC/Mips/mips1/invalid-mips2.s
+++ b/test/MC/Mips/mips1/invalid-mips2.s
@@ -21,3 +21,4 @@
         tnei      $t4,-29647      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.d $f22,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.s $f28,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips32.s b/test/MC/Mips/mips1/invalid-mips32.s
new file mode 100644
index 000000000000..4ad8d63eb2c8
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index aacbfe5401fd..66e11ba2fe52 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -73,6 +75,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         sb        $s6,-19857($14)
         sh        $14,-6704($15)
         sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
diff --git a/test/MC/Mips/mips2/invalid-mips32.s b/test/MC/Mips/mips2/invalid-mips32.s
index 653d2a13110d..43ea345441c5 100644
--- a/test/MC/Mips/mips2/invalid-mips32.s
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@@ -40,3 +40,5 @@
         msubu     $15,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      0               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync      1               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index fbbc983ecd46..9c3706ee3ff5 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -81,6 +83,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
         sb        $s6,-19857($14)
@@ -119,6 +122,7 @@
         swc3      $10,-32265($k0)
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips3/invalid-mips32.s b/test/MC/Mips/mips3/invalid-mips32.s
new file mode 100644
index 000000000000..3acd7651e629
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 7bd45a8be45a..cb209fdb208f 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
@@ -134,6 +136,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
         round.w.d $f6,$f4
@@ -176,6 +179,7 @@
         swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index a2355ef9a65f..c58cb88b052d 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -107,6 +109,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.w.d $f6,$f4
         round.w.s $f27,$f28
@@ -144,6 +147,8 @@
         swc2      $25,24880($s0)       # CHECK: swc2 $25, 24880($16)   # encoding: [0xea,0x19,0x61,0x30]
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index c814788744d3..e152f6f437c2 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -127,6 +129,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11              
@@ -174,6 +177,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 96cbcc832055..e6a6a6636158 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -15,6 +15,7 @@
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4             # CHECK: andi $2, $2, 4      # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
@@ -52,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
@@ -97,6 +100,7 @@
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
@@ -122,6 +126,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
@@ -143,3 +148,5 @@
         clz     $sp,$gp          # CHECK: clz $sp, $gp           # encoding: [0x03,0x80,0xe8,0x50]
         ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS32r6 and is equivalent to a nop instruction
         ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
diff --git a/test/MC/Mips/mips4/invalid-mips32.s b/test/MC/Mips/mips4/invalid-mips32.s
new file mode 100644
index 000000000000..52dea02d10c6
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 56e0be6fd0a1..949b91da922c 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -150,6 +152,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
@@ -195,6 +198,7 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips5/invalid-mips32.s b/test/MC/Mips/mips5/invalid-mips32.s
new file mode 100644
index 000000000000..2e2c8da462df
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips32.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+
+        sync 0                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sync 1                    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 1505b3c0d8af..3afdee1887c1 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -151,6 +153,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
@@ -197,6 +200,7 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
new file mode 100644
index 000000000000..0efdd2fa5c81
--- /dev/null
+++ b/test/MC/Mips/mips64-expansions.s
@@ -0,0 +1,209 @@
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+#
+# The GNU assembler implements 'dli' and 'dla' variants on 'li' and 'la'
+# supporting double-word lengths.  Test that not only are they present, bu
+# that they also seem to handle 64-bit values.
+#
+# XXXRW: Does using powers of ten make me a bad person?
+#
+# CHECK: ori	$12, $zero, 1           # encoding: [0x01,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10          # encoding: [0x0a,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 100         # encoding: [0x64,0x00,0x0c,0x34]
+# CHECK: ori	$12, $zero, 1000        # encoding: [0xe8,0x03,0x0c,0x34]
+# CHECK: ori	$12, $zero, 10000       # encoding: [0x10,0x27,0x0c,0x34]
+# CHECK: lui	$12, 1                  # encoding: [0x01,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34464         # encoding: [0xa0,0x86,0x8c,0x35]
+# CHECK: lui	$12, 15                 # encoding: [0x0f,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 16960         # encoding: [0x40,0x42,0x8c,0x35]
+# CHECK: lui	$12, 152                # encoding: [0x98,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 38528         # encoding: [0x80,0x96,0x8c,0x35]
+# CHECK: lui	$12, 1525               # encoding: [0xf5,0x05,0x0c,0x3c]
+# CHECK: ori	$12, $12, 57600         # encoding: [0x00,0xe1,0x8c,0x35]
+# CHECK: lui	$12, 15258              # encoding: [0x9a,0x3b,0x0c,0x3c]
+# CHECK: ori	$12, $12, 51712         # encoding: [0x00,0xca,0x8c,0x35]
+# CHECK: lui	$12, 2                  # encoding: [0x02,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 21515         # encoding: [0x0b,0x54,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 58368         # encoding: [0x00,0xe4,0x8c,0x35]
+# CHECK: lui	$12, 23                 # encoding: [0x17,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18550         # encoding: [0x76,0x48,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 59392         # encoding: [0x00,0xe8,0x8c,0x35]
+# CHECK: lui	$12, 232                # encoding: [0xe8,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 54437         # encoding: [0xa5,0xd4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 4096          # encoding: [0x00,0x10,0x8c,0x35]
+# CHECK: lui	$12, 2328               # encoding: [0x18,0x09,0x0c,0x3c]
+# CHECK: ori	$12, $12, 20082         # encoding: [0x72,0x4e,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 40960         # encoding: [0x00,0xa0,0x8c,0x35]
+# CHECK: lui	$12, 23283              # encoding: [0xf3,0x5a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 4218          # encoding: [0x7a,0x10,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 16384         # encoding: [0x00,0x40,0x8c,0x35]
+# CHECK: lui	$12, 3                  # encoding: [0x03,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 36222         # encoding: [0x7e,0x8d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42182         # encoding: [0xc6,0xa4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 35                 # encoding: [0x23,0x00,0x0c,0x3c]
+# CHECK: ori	$12, $12, 34546         # encoding: [0xf2,0x86,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 28609         # encoding: [0xc1,0x6f,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 355                # encoding: [0x63,0x01,0x0c,0x3c]
+# CHECK: ori	$12, $12, 17784         # encoding: [0x78,0x45,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23946         # encoding: [0x8a,0x5d,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 3552               # encoding: [0xe0,0x0d,0x0c,0x3c]
+# CHECK: ori	$12, $12, 46771         # encoding: [0xb3,0xb6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 42852         # encoding: [0x64,0xa7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 35527              # encoding: [0xc7,0x8a,0x0c,0x3c]
+# CHECK: ori	$12, $12, 8964          # encoding: [0x04,0x23,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 35304         # encoding: [0xe8,0x89,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: addiu	$12, $zero, -1          # encoding: [0xff,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10         # encoding: [0xf6,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -100        # encoding: [0x9c,0xff,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -1000       # encoding: [0x18,0xfc,0x0c,0x24]
+# CHECK: addiu	$12, $zero, -10000      # encoding: [0xf0,0xd8,0x0c,0x24]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65534         # encoding: [0xfe,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 31072         # encoding: [0x60,0x79,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65520         # encoding: [0xf0,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 48576         # encoding: [0xc0,0xbd,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 65383         # encoding: [0x67,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 27008         # encoding: [0x80,0x69,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 64010         # encoding: [0x0a,0xfa,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7936          # encoding: [0x00,0x1f,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65535         # encoding: [0xff,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 50277         # encoding: [0x65,0xc4,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 13824         # encoding: [0x00,0x36,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65533         # encoding: [0xfd,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 44020         # encoding: [0xf4,0xab,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 7168          # encoding: [0x00,0x1c,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65512         # encoding: [0xe8,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 46985         # encoding: [0x89,0xb7,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 6144          # encoding: [0x00,0x18,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 65303         # encoding: [0x17,0xff,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 11098         # encoding: [0x5a,0x2b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61440         # encoding: [0x00,0xf0,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 63207         # encoding: [0xe7,0xf6,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 45453         # encoding: [0x8d,0xb1,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 24576         # encoding: [0x00,0x60,0x8c,0x35]
+# CHECK: lui	$12, 65535              # encoding: [0xff,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 42252         # encoding: [0x0c,0xa5,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 61317         # encoding: [0x85,0xef,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 49152         # encoding: [0x00,0xc0,0x8c,0x35]
+# CHECK: lui	$12, 65532              # encoding: [0xfc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 29313         # encoding: [0x81,0x72,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 23353         # encoding: [0x39,0x5b,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 32768         # encoding: [0x00,0x80,0x8c,0x35]
+# CHECK: lui	$12, 65500              # encoding: [0xdc,0xff,0x0c,0x3c]
+# CHECK: ori	$12, $12, 30989         # encoding: [0x0d,0x79,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 36927         # encoding: [0x3f,0x90,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 65180              # encoding: [0x9c,0xfe,0x0c,0x3c]
+# CHECK: ori	$12, $12, 47751         # encoding: [0x87,0xba,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 41590         # encoding: [0x76,0xa2,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 61983              # encoding: [0x1f,0xf2,0x0c,0x3c]
+# CHECK: ori	$12, $12, 18764         # encoding: [0x4c,0x49,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 22684         # encoding: [0x9c,0x58,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+# CHECK: lui	$12, 30008              # encoding: [0x38,0x75,0x0c,0x3c]
+# CHECK: ori	$12, $12, 56571         # encoding: [0xfb,0xdc,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 30232         # encoding: [0x18,0x76,0x8c,0x35]
+# CHECK: dsll	$12, $12, 16            # encoding: [0x38,0x64,0x0c,0x00]
+# CHECK: ori	$12, $12, 0             # encoding: [0x00,0x00,0x8c,0x35]
+
+	dli	$t0, 1
+	dli	$t0, 10
+	dli	$t0, 100
+	dli	$t0, 1000
+	dli	$t0, 10000
+	dli	$t0, 100000
+	dli	$t0, 1000000
+	dli	$t0, 10000000
+	dli	$t0, 100000000
+	dli	$t0, 1000000000
+	dli	$t0, 10000000000
+	dli	$t0, 100000000000
+	dli	$t0, 1000000000000
+	dli	$t0, 10000000000000
+	dli	$t0, 100000000000000
+	dli	$t0, 1000000000000000
+	dli	$t0, 10000000000000000
+	dli	$t0, 100000000000000000
+	dli	$t0, 1000000000000000000
+	dli	$t0, 10000000000000000000
+	dli	$t0, -1
+	dli	$t0, -10
+	dli	$t0, -100
+	dli	$t0, -1000
+	dli	$t0, -10000
+	dli	$t0, -100000
+	dli	$t0, -1000000
+	dli	$t0, -10000000
+	dli	$t0, -100000000
+	dli	$t0, -1000000000
+	dli	$t0, -10000000000
+	dli	$t0, -100000000000
+	dli	$t0, -1000000000000
+	dli	$t0, -10000000000000
+	dli	$t0, -100000000000000
+	dli	$t0, -1000000000000000
+	dli	$t0, -10000000000000000
+	dli	$t0, -100000000000000000
+	dli	$t0, -1000000000000000000
+	dli	$t0, -10000000000000000000
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index f7d882a4edcf..81c22687796e 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -165,6 +167,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)          # encoding: [0xcc,0xa1,0x00,0x08]
         round.l.d $f12,$f1
         round.l.s $f25,$f5
@@ -211,6 +214,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 6ee24c2acec8..3d85c137ab9f 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -9,8 +9,10 @@
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
+        addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
         and       $s7,$v0,$12
+        and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -185,6 +187,7 @@
         nop
         nor       $a3,$zero,$a3
         or        $12,$s0,$sp
+        or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
         rdhwr     $sp,$11
@@ -238,6 +241,8 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
+        sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
         teqi      $s5,-17504
         tgei      $s1,5025
         tgeiu     $sp,-28621
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 1a51f8687997..1148a5246609 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -15,6 +15,7 @@
 
         .set noat
         # FIXME: Add the instructions carried forward from older ISA's
+        and     $2,4           # CHECK: andi $2, $2, 4        # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
@@ -52,6 +53,8 @@
         bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
         bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
         blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        bltc $5, $6, 256         # CHECK: bltc $5, $6, 256    # encoding: [0x5c,0xa6,0x00,0x40]
+        bltuc $5, $6, 256        # CHECK: bltuc $5, $6, 256   # encoding: [0x1c,0xa6,0x00,0x40]
         # bnvc requires that rs >= rt but we accept both. See also bnec
         bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
         bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
@@ -108,6 +111,8 @@
         ddivu   $2,$3,$4         # CHECK: ddivu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9f]
         dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
         dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+        dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
         ldpc    $2,123456        # CHECK: ldpc $2, 123456  # encoding: [0xec,0x58,0x3c,0x48]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
@@ -136,6 +141,7 @@
         maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        or      $2, 4            # CHECK: ori $2, $2, 4          # encoding: [0x34,0x42,0x00,0x04]
         seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
@@ -161,3 +167,5 @@
         dclz    $s0,$25          # CHECK: dclz $16, $25          # encoding: [0x03,0x20,0x80,0x52]
         ssnop                    # WARNING: [[@LINE]]:9: warning: ssnop is deprecated for MIPS64r6 and is equivalent to a nop instruction
         ssnop                    # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s
index ca97e1b96b07..d0bc6b3ecd74 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.s
+++ b/test/MC/PowerPC/ppc64-initial-cfa.s
@@ -28,8 +28,8 @@ _proc:
 # STATIC-NEXT:   Relocations [
 # STATIC-NEXT:   ]
 # STATIC-NEXT:   SectionData (
-# STATIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# STATIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# STATIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# STATIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # STATIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # STATIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # STATIC-BE-NEXT:  0020: 00000004 00000000
@@ -69,8 +69,8 @@ _proc:
 # PIC-NEXT:   Relocations [
 # PIC-NEXT:   ]
 # PIC-NEXT:   SectionData (
-# PIC-BE-NEXT:  0000: 00000010 00000000 017A5200 04784101
-# PIC-LE-NEXT:  0000: 10000000 00000000 017A5200 04784101
+# PIC-BE-NEXT:  0000: 00000010 00000000 037A5200 04784101
+# PIC-LE-NEXT:  0000: 10000000 00000000 037A5200 04784101
 # PIC-BE-NEXT:  0010: 1B0C0100 00000010 00000018 00000000
 # PIC-LE-NEXT:  0010: 1B0C0100 10000000 18000000 00000000
 # PIC-BE-NEXT:  0020: 00000004 00000000
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index f38eaf5402fe..b9674231d950 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -3159,3 +3159,19 @@ vmovntdqa 0x12345678(%rbx), %zmm13
 // CHECK: vmovntdqa
 // CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x14,0x56]
 vmovntdqa (%r14,%rdx,2), %zmm18
+
+// CHECK: vmovntdqa
+// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
+vmovntdqa 128(%r12,%rdx), %zmm23
+
+// CHECK: vmovntdq
+// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
+vmovntdq %zmm28, (%rcx,%r13,4)
+
+// CHECK: vmovntpd
+// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
+vmovntpd %zmm6, 4(%rdx)
+
+// CHECK: vmovntps
+// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
+vmovntps %zmm11, (%r13,%rcx,4)
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 540282a74c73..796891880b12 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -599,3 +599,11 @@ fxrstor64 opaque ptr [rax]
 // CHECK: movq _g0+8, %rcx
 mov rbx, qword ptr [_g0]
 mov rcx, qword ptr [_g0 + 8]
+
+"?half@?0??bar@@YAXXZ@4NA":
+	.quad   4602678819172646912
+
+fadd   "?half@?0??bar@@YAXXZ@4NA"
+fadd   "?half@?0??bar@@YAXXZ@4NA"@IMGREL
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"@IMGREL32
diff --git a/test/Object/Inputs/hello-world.macho-x86_64 b/test/Object/Inputs/hello-world.macho-x86_64
new file mode 100755
index 000000000000..d004bedf6ad4
Binary files /dev/null and b/test/Object/Inputs/hello-world.macho-x86_64 differ
diff --git a/test/Object/Inputs/macho-archive-x86_64.a b/test/Object/Inputs/macho-archive-x86_64.a
new file mode 100644
index 000000000000..9979ba9dd1e3
Binary files /dev/null and b/test/Object/Inputs/macho-archive-x86_64.a differ
diff --git a/test/Object/nm-darwin-m.test b/test/Object/nm-darwin-m.test
index 6c718128aa61..5bb19dcacd37 100644
--- a/test/Object/nm-darwin-m.test
+++ b/test/Object/nm-darwin-m.test
@@ -2,7 +2,7 @@ RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test1.mach0-armv7 \
 RUN:         | FileCheck %s -check-prefix test1
 RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test2.macho-i386 \
 RUN:         | FileCheck %s -check-prefix test2
-RUN: llvm-nm -format darwin %p/Inputs/darwin-m-test3.macho-x86-64 \
+RUN: llvm-nm -m %p/Inputs/darwin-m-test3.macho-x86-64 \
 RUN:         | FileCheck %s -check-prefix test3
 
 # This is testing that the various bits in the n_desc feild are correct
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 20ac6621e728..c53dc91ecd4c 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -14,6 +14,8 @@ RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
 RUN:         | FileCheck %s -check-prefix macho
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
 RUN:         | FileCheck %s -check-prefix macho64
+RUN: llvm-nm %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-tdb
 RUN: llvm-nm %p/Inputs/common.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF-COMMON
 RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \
@@ -64,6 +66,11 @@ macho64:                  U _SomeOtherFunction
 macho64: 0000000000000000 T _main
 macho64:                  U _puts
 
+macho-tdb: 0000000000000030 s EH_frame0
+macho-tdb: 0000000000000070 b _b
+macho-tdb: 000000000000000c D _d
+macho-tdb: 0000000000000000 T _t
+macho-tdb: 0000000000000048 S _t.eh
 
 Test that nm uses addresses even with ELF .o files.
 ELF-SEC-ADDR64:      0000000000000058 D a
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index c20c733dcd8b..8992359a79b6 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -3,17 +3,17 @@ RUN:         | FileCheck %s -check-prefix CHECK-OBJ
 RUN: llvm-nm %p/Inputs/macho-universal-archive.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-AR
 
-CHECK-OBJ: macho-universal.x86_64.i386:x86_64
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture x86_64):
 CHECK-OBJ: 0000000100000f60 T _main
-CHECK-OBJ: macho-universal.x86_64.i386:i386
+CHECK-OBJ: macho-universal.x86_64.i386 (for architecture i386):
 CHECK-OBJ: 00001fa0 T _main
 
-CHECK-AR: macho-universal-archive.x86_64.i386:x86_64:hello.o:
+CHECK-AR: macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64):
 CHECK-AR: 0000000000000068 s EH_frame0
 CHECK-AR: 000000000000003b s L_.str
 CHECK-AR: 0000000000000000 T _main
 CHECK-AR: 0000000000000080 S _main.eh
 CHECK-AR:                  U _printf
-CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o:
-CHECK-AR: 00000008 S _bar
+CHECK-AR: macho-universal-archive.x86_64.i386(foo.o) (for architecture i386):
+CHECK-AR: 00000008 D _bar
 CHECK-AR: 00000000 T _foo
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
index 6ecdf5c2a8c4..1642790c2c7b 100644
--- a/test/Object/size-trivial-macho.test
+++ b/test/Object/size-trivial-macho.test
@@ -2,6 +2,18 @@ RUN: llvm-size -A %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix A
 RUN: llvm-size -B %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix B
+RUN: llvm-size -format darwin %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix m
+RUN: llvm-size %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix AR
+RUN: llvm-size -format darwin %p/Inputs/macho-archive-x86_64.a \
+RUN:         | FileCheck %s -check-prefix mAR
+RUN: llvm-size -m -x -l %p/Inputs/hello-world.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix mxl
+RUN: llvm-size %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix u
+RUN: llvm-size %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix uAR
 
 A: section              size   addr
 A: __text                 12      0
@@ -11,5 +23,57 @@ A: __compact_unwind       32     16
 A: __eh_frame             64     48
 A: Total                 116
 
-B:   text    data     bss     dec     hex filename
-B:     12     100       4     116      74 
+B:	__TEXT	__DATA	__OBJC	others	dec	hex
+B:	76	8	0	32	116	74	
+
+m: Segment : 116
+m: 	Section (__TEXT, __text): 12
+m: 	Section (__DATA, __data): 4
+m: 	Section (__DATA, __bss): 4
+m: 	Section (__LD, __compact_unwind): 32
+m: 	Section (__TEXT, __eh_frame): 64
+m: 	total 116
+m: total 116
+
+AR: __TEXT	__DATA	__OBJC	others	dec	hex
+AR: 70	0	0	32	102	66	{{.*}}/macho-archive-x86_64.a(foo.o)
+AR: 0	4	0	0	4	4	{{.*}}/macho-archive-x86_64.a(bar.o)
+
+mAR: {{.*}}/macho-archive-x86_64.a(foo.o):
+mAR: Segment : 104
+mAR: 	Section (__TEXT, __text): 6
+mAR: 	Section (__LD, __compact_unwind): 32
+mAR: 	Section (__TEXT, __eh_frame): 64
+mAR: 	total 102
+mAR: total 104
+mAR: {{.*}}/macho-archive-x86_64.a(bar.o):
+mAR: Segment : 4
+mAR: 	Section (__TEXT, __text): 0
+mAR: 	Section (__DATA, __data): 4
+mAR: 	total 4
+mAR: total 4
+
+
+mxl: Segment __PAGEZERO: 0x100000000 (vmaddr 0x0 fileoff 0)
+mxl: Segment __TEXT: 0x1000 (vmaddr 0x100000000 fileoff 0)
+mxl: 	Section __text: 0x3b (addr 0x100000f30 offset 3888)
+mxl: 	Section __stubs: 0x6 (addr 0x100000f6c offset 3948)
+mxl: 	Section __stub_helper: 0x1a (addr 0x100000f74 offset 3956)
+mxl: 	Section __cstring: 0xd (addr 0x100000f8e offset 3982)
+mxl: 	Section __unwind_info: 0x48 (addr 0x100000f9b offset 3995)
+mxl: 	Section __eh_frame: 0x18 (addr 0x100000fe8 offset 4072)
+mxl: 	total 0xc8
+mxl: Segment __DATA: 0x1000 (vmaddr 0x100001000 fileoff 4096)
+mxl: 	Section __nl_symbol_ptr: 0x10 (addr 0x100001000 offset 4096)
+mxl: 	Section __la_symbol_ptr: 0x8 (addr 0x100001010 offset 4112)
+mxl: 	total 0x18
+mxl: Segment __LINKEDIT: 0x1000 (vmaddr 0x100002000 fileoff 8192)
+mxl: total 0x100003000
+
+u: __TEXT	__DATA	__OBJC	others	dec	hex
+u: 4096	0	0	4294971392	4294975488	100002000	{{.*}}/macho-universal.x86_64.i386 (for architecture x86_64)
+u: 4096	0	0	8192	12288	3000	{{.*}}/macho-universal.x86_64.i386 (for architecture i386)
+
+uAR: __TEXT	__DATA	__OBJC	others	dec	hex
+uAR: 136	0	0	32	168	a8	{{.*}}/macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64)
+uAR: 5	4	0	0	9	9	{{.*}}/macho-universal-archive.x86_64.i386(foo.o) (for architecture i386)
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
index 70178ec83b6e..8f3d429c8d60 100644
--- a/test/Transforms/InstCombine/AddOverFlow.ll
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -34,6 +34,44 @@ define i16 @zero_sign_bit2(i16 %a, i16 %b) {
   ret i16 %3
 }
 
+declare i16 @bounded(i16 %input);
+declare i32 @__gxx_personality_v0(...);
+!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
+!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+
+define i16 @add_bounded_values(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !0
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0
+cont:
+; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow.
+  %e = add i16 %c, %d
+; CHECK: add nuw i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+define i16 @add_bounded_values_2(i16 %a, i16 %b) {
+; CHECK-LABEL: @add_bounded_values_2(
+entry:
+  %c = call i16 @bounded(i16 %a), !range !1
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1
+cont:
+; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore,
+; %c + %d may unsigned overflow and we cannot add NUW.
+  %e = add i16 %c, %d
+; CHECK: add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
 ; CHECK-LABEL: @ripple_nsw1
 ; CHECK: add nsw i16 %a, %b
 define i16 @ripple_nsw1(i16 %x, i16 %y) {
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index aaf3a7a235bf..821fce0dabcc 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -86,3 +86,175 @@ define i16 @test9(i16 %a) {
 ; CHECK-NEXT:  %d = mul i16 %a, -32767
 ; CHECK-NEXT:  ret i16 %d
 }
+
+define i32 @test10(i32 %x) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %x, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -1431655766
+; CHECK-NEXT: ret i32 [[AND]]
+}
+
+define i32 @test11(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test12(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test13(i32 %x, i32 %y) {
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i32 @test14(i32 %x, i32 %y) {
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655767
+  %neg = xor i32 %shr.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[SHR:%[a-z0-9]+]] = ashr i32 %x, 3
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHR]], 1431655766
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = sub i32 %y, [[AND]]
+; CHECK-NEXT: ret i32 [[SUB]]
+}
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+ %add1 = add nsw i16 %x, %x
+ %add2 = add nsw i16 %add1, %x
+ ret i16 %add2
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 3
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %x, %mul1
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+ %mul1 = mul nsw i16 %x, 8
+ %add2 = add nsw i16 %mul1, %x
+ ret i16 %add2
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT: %add2 = mul nsw i16 %x, 9
+; CHECK-NEXT: ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+ %mul1 = mul i16 %a, 2
+ %mul2 = mul i16 %a, 3
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT: %add = mul i16 %a, 5
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+ %mul1 = mul nsw i16 %a, 2
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT: %add = mul nsw i16 %a, 9
+; CHECK-NEXT: ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+ %mul1 = mul nsw i16 %a, 3
+ %mul2 = mul nsw i16 %a, 7
+ %add = add nsw i16 %mul1, %mul2
+ ret i16 %add
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT: %add = mul nsw i16 %a, 10
+; CHECK-NEXT: ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT: %mul1 = mul nsw i32 %x, %y
+; CHECK-NEXT: %add = mul nsw i32 %mul1, 6
+; CHECK-NEXT: ret i32 %add
+}
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+; CHECK: or i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = metadata !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+; CHECK: or i16 %cttz, -16
+  ret i16 %b
+}
+!1 = metadata !{i16 0, i16 32}
diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll
index 5bca46d5a21d..f6a877684ce4 100644
--- a/test/Transforms/InstCombine/align-2d-gep.ll
+++ b/test/Transforms/InstCombine/align-2d-gep.ll
@@ -31,7 +31,7 @@ bb1:
   store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
 
   %indvar.next = add i64 %j, 2
-  %exitcond = icmp eq i64 %indvar.next, 557
+  %exitcond = icmp eq i64 %indvar.next, 556
   br i1 %exitcond, label %bb11, label %bb1
 
 bb11:
diff --git a/test/Transforms/InstCombine/pr20079.ll b/test/Transforms/InstCombine/pr20079.ll
new file mode 100644
index 000000000000..3c86ecc5f30b
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20079.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+@b = internal global [1 x i32] zeroinitializer, align 4
+@c = internal global i32 0, align 4
+
+; CHECK-LABEL: @fn1
+; CHECK: [[ADD:%.*]] = add i32 %a, -1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], sub (i32 0, i32 zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32))
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @fn1(i32 %a) {
+  %xor = add i32 %a, -1
+  %mul = mul nsw i32 %xor, zext (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @b, i64 0, i64 0), i32* @c) to i32)
+  ret i32 %mul
+}
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
new file mode 100644
index 000000000000..1db6b0d28bf5
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
new file mode 100644
index 000000000000..5d314db7133d
--- /dev/null
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instsimplify -S | not grep or
+
+; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i39 @test1(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL @test1
+; CHECK-NEXT: and {{.*}}, -274877906944
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. 
+define i399 @test2(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL @test2
+; CHECK-NEXT: and {{.*}}, 18446742974197923840
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 105e244ed8cf..89fd6362d78d 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -883,3 +883,22 @@ define i1 @returns_nonnull() {
 ; CHECK: ret i1 false
 }
 
+; If a bit is known to be zero for A and known to be one for B,
+; then A and B cannot be equal.
+define i1 @icmp_eq_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp eq i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_eq_const
+; CHECK-NEXT: ret i1 false 
+}
+
+define i1 @icmp_ne_const(i32 %a) nounwind {
+  %b = mul nsw i32 %a, -2
+  %c = icmp ne i32 %b, 1
+  ret i1 %c
+
+; CHECK-LABEL: @icmp_ne_const
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/JumpThreading/pr15851_hang.ll b/test/Transforms/JumpThreading/pr15851_hang.ll
new file mode 100644
index 000000000000..0484bc9f9dc9
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr15851_hang.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry
+; CHECK: ret void
+; CHECK-NOT: for.cond1
+; CHECK-NOT: for.body
+
+define void @f() {
+entry:
+  ret void
+
+for.cond1:
+  %i.025 = phi i32 [ %inc, %for.body ], [ %inc, %for.body ], [ 1, %for.cond1 ]
+  %cmp = icmp slt i32 %i.025, 2
+  br i1 %cmp, label %for.body, label %for.cond1
+
+for.body:
+  %inc = add nsw i32 %i.025, 0
+  %a = icmp ugt i32 %inc, 2
+  br i1 %a, label %for.cond1, label %for.cond1
+}
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 201e604e0c5e..545e86c082f4 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -127,7 +127,7 @@ L4:
 ; CHECK: test_switch_default
 ; CHECK: entry:
 ; CHECK: load
-; CHECK: switch
+; CHECK: icmp
 ; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
 ; CHECK: store
 ; CHECK: br
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
new file mode 100644
index 000000000000..8303bc8181ef
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [4 x i32] zeroinitializer, align 16
+@c = common global [4 x i32] zeroinitializer, align 16
+@d = common global [4 x i32] zeroinitializer, align 16
+@e = common global [4 x i32] zeroinitializer, align 16
+@a = common global [4 x i32] zeroinitializer, align 16
+@fb = common global [4 x float] zeroinitializer, align 16
+@fc = common global [4 x float] zeroinitializer, align 16
+@fa = common global [4 x float] zeroinitializer, align 16
+
+; CHECK-LABEL: @addsub
+; CHECK: %5 = add <4 x i32> %3, %4
+; CHECK: %6 = add <4 x i32> %2, %5
+; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @addsub() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %add2 = add nsw i32 %add, %add1
+  store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add4 = add nsw i32 %6, %7
+  %sub = sub nsw i32 %add3, %add4
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %add7 = add nsw i32 %add5, %add6
+  store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %sub10 = sub nsw i32 %add8, %add9
+  store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @subadd
+; CHECK:  %5 = add <4 x i32> %3, %4
+; CHECK:  %6 = sub <4 x i32> %2, %5
+; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+
+; Function Attrs: nounwind uwtable
+define void @subadd() #0 {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %add1 = add nsw i32 %2, %3
+  %sub = sub nsw i32 %add, %add1
+  store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
+  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %add2 = add nsw i32 %4, %5
+  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %6, %7
+  %add4 = add nsw i32 %add2, %add3
+  store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
+  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %sub7 = sub nsw i32 %add5, %add6
+  store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
+  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %add10 = add nsw i32 %add8, %add9
+  store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @faddfsub
+; CHECK: %2 = fadd <4 x float> %0, %1
+; CHECK: %3 = fsub <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %sub = fsub float %2, %3
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add1 = fadd float %4, %5
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub2 = fsub float %6, %7
+  store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @fsubfadd
+; CHECK: %2 = fsub <4 x float> %0, %1
+; CHECK: %3 = fadd <4 x float> %0, %1
+; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; Function Attrs: nounwind uwtable
+define void @fsubfadd() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add = fadd float %2, %3
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %sub1 = fsub float %4, %5
+  store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %add2 = fadd float %6, %7
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+; CHECK-LABEL: @No_faddfsub
+; CHECK-NOT: fadd <4 x float>
+; CHECK-NOT: fsub <4 x float>
+; CHECK-NOT: shufflevector
+; Function Attrs: nounwind uwtable
+define void @No_faddfsub() #0 {
+entry:
+  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %add = fadd float %0, %1
+  store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %add1 = fadd float %2, %3
+  store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %add2 = fadd float %4, %5
+  store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %sub = fsub float %6, %7
+  store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 81079b1aa5ab..ee63d2c0c0f6 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -918,3 +918,55 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; Don't build tables for switches with TLS variables.
+@tls_a = thread_local global i32 0
+@tls_b = thread_local global i32 0
+@tls_c = thread_local global i32 0
+@tls_d = thread_local global i32 0
+define i32* @tls(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @tls(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
+
+; Don't build tables for switches with dllimport variables.
+@dllimport_a = external dllimport global i32
+@dllimport_b = external dllimport global i32
+@dllimport_c = external dllimport global i32
+@dllimport_d = external dllimport global i32
+define i32* @dllimport(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+sw.bb1:
+  br label %return
+sw.bb2:
+  br label %return
+sw.default:
+  br label %return
+return:
+  %retval.0 = phi i32* [ @dllimport_d, %sw.default ], [ @dllimport_c, %sw.bb2 ], [ @dllimport_b, %sw.bb1 ], [ @dllimport_a, %entry ]
+  ret i32* %retval.0
+; CHECK-LABEL: @dllimport(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
+}
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
index b6a75d13bba0..f15ca3f74065 100644
--- a/test/Verifier/range-1.ll
+++ b/test/Verifier/range-1.ll
@@ -6,7 +6,7 @@ entry:
   ret void
 }
 !0 = metadata !{i8 0, i8 1}
-; CHECK: Ranges are only for loads!
+; CHECK: Ranges are only for loads, calls and invokes!
 ; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
 
 define i8 @f2(i8* %x) {
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
index 8d85d1915195..1d2e0575d76a 100644
--- a/test/Verifier/range-2.ll
+++ b/test/Verifier/range-2.ll
@@ -34,3 +34,33 @@ entry:
   ret i8 %y
 }
 !4 = metadata !{i8 -1, i8 0, i8 1, i8 -2}
+
+; We can annotate the range of the return value of a CALL.
+define void @call_all(i8* %x) {
+entry:
+  %v1 = call i8 @f1(i8* %x), !range !0
+  %v2 = call i8 @f2(i8* %x), !range !1
+  %v3 = call i8 @f3(i8* %x), !range !2
+  %v4 = call i8 @f4(i8* %x), !range !3
+  %v5 = call i8 @f5(i8* %x), !range !4
+  ret void
+}
+
+; We can annotate the range of the return value of an INVOKE.
+define void @invoke_all(i8* %x) {
+entry:
+  %v1 = invoke i8 @f1(i8* %x) to label %cont unwind label %lpad, !range !0
+  %v2 = invoke i8 @f2(i8* %x) to label %cont unwind label %lpad, !range !1
+  %v3 = invoke i8 @f3(i8* %x) to label %cont unwind label %lpad, !range !2
+  %v4 = invoke i8 @f4(i8* %x) to label %cont unwind label %lpad, !range !3
+  %v5 = invoke i8 @f5(i8* %x) to label %cont unwind label %lpad, !range !4
+
+cont:
+  ret void
+
+lpad:
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/tools/llvm-cov/copy_block_helper.m b/test/tools/llvm-cov/copy_block_helper.m
index 1859b883775d..64973f11009a 100644
--- a/test/tools/llvm-cov/copy_block_helper.m
+++ b/test/tools/llvm-cov/copy_block_helper.m
@@ -29,4 +29,4 @@ void test(id x) { // GCOV: -:    [[@LINE]]:void test
 int main(int argc, const char *argv[]) { test(0); }
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
index 2345f8d51e80..82d1273ae61f 100644
--- a/test/tools/llvm-cov/llvm-cov.test
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -110,4 +110,4 @@ RUN: not llvm-cov test.c -gcda=test_file_checksum_fail.gcda
 # Bad function checksum on gcda
 RUN: not llvm-cov test.c -gcda=test_func_checksum_fail.gcda
 
-XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/range_based_for.cpp b/test/tools/llvm-cov/range_based_for.cpp
index 61f60f6b0d65..3fdb2441399c 100644
--- a/test/tools/llvm-cov/range_based_for.cpp
+++ b/test/tools/llvm-cov/range_based_for.cpp
@@ -26,4 +26,4 @@ int main(int argc, const char *argv[]) { // GCOV: 1:    [[@LINE]]:int main(
 }                                        // GCOV: -:    [[@LINE]]:}
 
 // llvm-cov doesn't work on big endian yet
-// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el b/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el
new file mode 100755
index 000000000000..3afc567f85d5
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/got-tls.so.elf-mips64el differ
diff --git a/test/tools/llvm-readobj/mips-got.test b/test/tools/llvm-readobj/mips-got.test
new file mode 100644
index 000000000000..76db3c845eb4
--- /dev/null
+++ b/test/tools/llvm-readobj/mips-got.test
@@ -0,0 +1,306 @@
+RUN: llvm-readobj -mips-plt-got %p/Inputs/relocs.obj.elf-mips | \
+RUN:   FileCheck %s -check-prefix GOT-OBJ
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-exe.mips | \
+RUN:   FileCheck %s -check-prefix GOT-EXE
+RUN: llvm-readobj -mips-plt-got %p/Inputs/dynamic-table-so.mips | \
+RUN:   FileCheck %s -check-prefix GOT-SO
+RUN: llvm-readobj -mips-plt-got %p/Inputs/got-tls.so.elf-mips64el | \
+RUN:   FileCheck %s -check-prefix GOT-TLS
+
+GOT-OBJ: Cannot find PLTGOT dynamic table tag.
+
+GOT-EXE:      Primary GOT {
+GOT-EXE-NEXT:   Canonical gp value: 0x418880
+GOT-EXE-NEXT:   Reserved entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410890
+GOT-EXE-NEXT:       Access: -32752
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Purpose: Lazy resolver
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410894
+GOT-EXE-NEXT:       Access: -32748
+GOT-EXE-NEXT:       Initial: 0x80000000
+GOT-EXE-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Local entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x410898
+GOT-EXE-NEXT:       Access: -32744
+GOT-EXE-NEXT:       Initial: 0x400418
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x41089C
+GOT-EXE-NEXT:       Access: -32740
+GOT-EXE-NEXT:       Initial: 0x410840
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A0
+GOT-EXE-NEXT:       Access: -32736
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Global entries [
+GOT-EXE-NEXT:     Entry {
+GOT-EXE-NEXT:       Address: 0x4108A4
+GOT-EXE-NEXT:       Access: -32732
+GOT-EXE-NEXT:       Initial: 0x0
+GOT-EXE-NEXT:       Value: 0x0
+GOT-EXE-NEXT:       Type: Function (0x2)
+GOT-EXE-NEXT:       Section: Undefined (0x0)
+GOT-EXE-NEXT:       Name: __gmon_start__@ (1)
+GOT-EXE-NEXT:     }
+GOT-EXE-NEXT:   ]
+GOT-EXE-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-EXE-NEXT: }
+
+GOT-SO:      Primary GOT {
+GOT-SO-NEXT:   Canonical gp value: 0x188D0
+GOT-SO-NEXT:   Reserved entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E0
+GOT-SO-NEXT:       Access: -32752
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Purpose: Lazy resolver
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E4
+GOT-SO-NEXT:       Access: -32748
+GOT-SO-NEXT:       Initial: 0x80000000
+GOT-SO-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Local entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108E8
+GOT-SO-NEXT:       Access: -32744
+GOT-SO-NEXT:       Initial: 0x108E0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108EC
+GOT-SO-NEXT:       Access: -32740
+GOT-SO-NEXT:       Initial: 0x10000
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F0
+GOT-SO-NEXT:       Access: -32736
+GOT-SO-NEXT:       Initial: 0x10920
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F4
+GOT-SO-NEXT:       Access: -32732
+GOT-SO-NEXT:       Initial: 0x108CC
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108F8
+GOT-SO-NEXT:       Access: -32728
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x108FC
+GOT-SO-NEXT:       Access: -32724
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10900
+GOT-SO-NEXT:       Access: -32720
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10904
+GOT-SO-NEXT:       Access: -32716
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Global entries [
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10908
+GOT-SO-NEXT:       Access: -32712
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_registerTMCloneTable@ (87)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1090C
+GOT-SO-NEXT:       Access: -32708
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _Jv_RegisterClasses@ (128)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10910
+GOT-SO-NEXT:       Access: -32704
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __gmon_start__@ (23)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10914
+GOT-SO-NEXT:       Access: -32700
+GOT-SO-NEXT:       Initial: 0x840
+GOT-SO-NEXT:       Value: 0x840
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: puts@GLIBC_2.0 (162)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x10918
+GOT-SO-NEXT:       Access: -32696
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: None (0x0)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: _ITM_deregisterTMCloneTable@ (59)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:     Entry {
+GOT-SO-NEXT:       Address: 0x1091C
+GOT-SO-NEXT:       Access: -32692
+GOT-SO-NEXT:       Initial: 0x0
+GOT-SO-NEXT:       Value: 0x0
+GOT-SO-NEXT:       Type: Function (0x2)
+GOT-SO-NEXT:       Section: Undefined (0x0)
+GOT-SO-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (113)
+GOT-SO-NEXT:     }
+GOT-SO-NEXT:   ]
+GOT-SO-NEXT:   Number of TLS and multi-GOT entries: 0
+GOT-SO-NEXT: }
+
+GOT-TLS:      Primary GOT {
+GOT-TLS-NEXT:   Canonical gp value: 0x18BF0
+GOT-TLS-NEXT:   Reserved entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C00
+GOT-TLS-NEXT:       Access: -32752
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Purpose: Lazy resolver
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C08
+GOT-TLS-NEXT:       Access: -32744
+GOT-TLS-NEXT:       Initial: 0x8000000000000000
+GOT-TLS-NEXT:       Purpose: Module pointer (GNU extension)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Local entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C10
+GOT-TLS-NEXT:       Access: -32736
+GOT-TLS-NEXT:       Initial: 0x10000
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C18
+GOT-TLS-NEXT:       Access: -32728
+GOT-TLS-NEXT:       Initial: 0x10C00
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C20
+GOT-TLS-NEXT:       Access: -32720
+GOT-TLS-NEXT:       Initial: 0x10CB8
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C28
+GOT-TLS-NEXT:       Access: -32712
+GOT-TLS-NEXT:       Initial: 0x10BF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C30
+GOT-TLS-NEXT:       Access: -32704
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C38
+GOT-TLS-NEXT:       Access: -32696
+GOT-TLS-NEXT:       Initial: 0x948
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C40
+GOT-TLS-NEXT:       Access: -32688
+GOT-TLS-NEXT:       Initial: 0xA20
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C48
+GOT-TLS-NEXT:       Access: -32680
+GOT-TLS-NEXT:       Initial: 0xAF0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C50
+GOT-TLS-NEXT:       Access: -32672
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C58
+GOT-TLS-NEXT:       Access: -32664
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C60
+GOT-TLS-NEXT:       Access: -32656
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Global entries [
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C68
+GOT-TLS-NEXT:       Access: -32648
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_registerTMCloneTable@ (78)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C70
+GOT-TLS-NEXT:       Access: -32640
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _Jv_RegisterClasses@ (119)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C78
+GOT-TLS-NEXT:       Access: -32632
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __gmon_start__@ (23)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C80
+GOT-TLS-NEXT:       Access: -32624
+GOT-TLS-NEXT:       Initial: 0xB60
+GOT-TLS-NEXT:       Value: 0xB60
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __tls_get_addr@GLIBC_2.3 (150)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C88
+GOT-TLS-NEXT:       Access: -32616
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: None (0x0)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: _ITM_deregisterTMCloneTable@ (50)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:     Entry {
+GOT-TLS-NEXT:       Address: 0x10C90
+GOT-TLS-NEXT:       Access: -32608
+GOT-TLS-NEXT:       Initial: 0x0
+GOT-TLS-NEXT:       Value: 0x0
+GOT-TLS-NEXT:       Type: Function (0x2)
+GOT-TLS-NEXT:       Section: Undefined (0x0)
+GOT-TLS-NEXT:       Name: __cxa_finalize@GLIBC_2.2 (104)
+GOT-TLS-NEXT:     }
+GOT-TLS-NEXT:   ]
+GOT-TLS-NEXT:   Number of TLS and multi-GOT entries: 4
+GOT-TLS-NEXT: }
diff --git a/tools/gold/CMakeLists.txt b/tools/gold/CMakeLists.txt
index 07a1e2849b95..3864e154548c 100644
--- a/tools/gold/CMakeLists.txt
+++ b/tools/gold/CMakeLists.txt
@@ -14,13 +14,14 @@ else()
   # ABI compatibility.
   add_definitions( -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 )
 
-  set(LLVM_LINK_COMPONENTS support)
+  set(LLVM_LINK_COMPONENTS
+     ${LLVM_TARGETS_TO_BUILD}
+     LTO
+     )
 
   add_llvm_loadable_module(LLVMgold
     gold-plugin.cpp
     )
 
-  target_link_libraries(LLVMgold ${cmake_2_8_12_PRIVATE} LTO)
-
 endif()
 
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 371574f26156..3c2da94af532 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -15,11 +15,15 @@
 #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
 #include "llvm-c/lto.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/LTO/LTOCodeGenerator.h"
+#include "llvm/LTO/LTOModule.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <cerrno>
 #include <cstdlib>
@@ -46,36 +50,32 @@
 using namespace llvm;
 
 namespace {
-  ld_plugin_status discard_message(int level, const char *format, ...) {
-    // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
-    // callback in the transfer vector. This should never be called.
-    abort();
-  }
+struct claimed_file {
+  void *handle;
+  std::vector<ld_plugin_symbol> syms;
+};
+}
 
-  ld_plugin_add_symbols add_symbols = NULL;
-  ld_plugin_get_symbols get_symbols = NULL;
-  ld_plugin_add_input_file add_input_file = NULL;
-  ld_plugin_add_input_library add_input_library = NULL;
-  ld_plugin_set_extra_library_path set_extra_library_path = NULL;
-  ld_plugin_get_view get_view = NULL;
-  ld_plugin_message message = discard_message;
-
-  int api_version = 0;
-  int gold_version = 0;
-
-  struct claimed_file {
-    void *handle;
-    std::vector<ld_plugin_symbol> syms;
-  };
-
-  lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
-  std::string output_name = "";
-  std::list<claimed_file> Modules;
-  std::vector<std::string> Cleanup;
-  lto_code_gen_t code_gen = NULL;
-  StringSet<> CannotBeHidden;
+static ld_plugin_status discard_message(int level, const char *format, ...) {
+  // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
+  // callback in the transfer vector. This should never be called.
+  abort();
 }
 
+static ld_plugin_add_symbols add_symbols = NULL;
+static ld_plugin_get_symbols get_symbols = NULL;
+static ld_plugin_add_input_file add_input_file = NULL;
+static ld_plugin_set_extra_library_path set_extra_library_path = NULL;
+static ld_plugin_get_view get_view = NULL;
+static ld_plugin_message message = discard_message;
+static lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
+static std::string output_name = "";
+static std::list<claimed_file> Modules;
+static std::vector<std::string> Cleanup;
+static LTOCodeGenerator *CodeGen = nullptr;
+static StringSet<> CannotBeHidden;
+static llvm::TargetOptions TargetOpts;
+
 namespace options {
   enum generate_bc { BC_NO, BC_ALSO, BC_ONLY };
   static bool generate_api_file = false;
@@ -142,15 +142,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
   // for services.
 
   bool registeredClaimFile = false;
+  bool RegisteredAllSymbolsRead = false;
 
   for (; tv->tv_tag != LDPT_NULL; ++tv) {
     switch (tv->tv_tag) {
-      case LDPT_API_VERSION:
-        api_version = tv->tv_u.tv_val;
-        break;
-      case LDPT_GOLD_VERSION:  // major * 100 + minor
-        gold_version = tv->tv_u.tv_val;
-        break;
       case LDPT_OUTPUT_NAME:
         output_name = tv->tv_u.tv_string;
         break;
@@ -169,8 +164,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
                        tv->tv_u.tv_val);
             return LDPS_ERR;
         }
-        // TODO: add an option to disable PIC.
-        //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
         break;
       case LDPT_OPTION:
         options::process_plugin_option(tv->tv_u.tv_string);
@@ -191,7 +184,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         if ((*callback)(all_symbols_read_hook) != LDPS_OK)
           return LDPS_ERR;
 
-        code_gen = lto_codegen_create();
+        RegisteredAllSymbolsRead = true;
       } break;
       case LDPT_REGISTER_CLEANUP_HOOK: {
         ld_plugin_register_cleanup callback;
@@ -209,9 +202,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
       case LDPT_ADD_INPUT_FILE:
         add_input_file = tv->tv_u.tv_add_input_file;
         break;
-      case LDPT_ADD_INPUT_LIBRARY:
-        add_input_library = tv->tv_u.tv_add_input_file;
-        break;
       case LDPT_SET_EXTRA_LIBRARY_PATH:
         set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
         break;
@@ -235,6 +225,37 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
+  if (!RegisteredAllSymbolsRead)
+    return LDPS_OK;
+
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
+  InitializeAllAsmPrinters();
+  CodeGen = new LTOCodeGenerator();
+  if (MAttrs.size()) {
+    std::string Attrs;
+    for (unsigned I = 0; I < MAttrs.size(); ++I) {
+      if (I > 0)
+        Attrs.append(",");
+      Attrs.append(MAttrs[I]);
+    }
+    CodeGen->setAttr(Attrs.c_str());
+  }
+
+  // Pass through extra options to the code generator.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      CodeGen->setCodeGenDebugOptions((*it).c_str());
+    }
+  }
+
+  CodeGen->parseCodeGenDebugOptions();
+  TargetOpts = InitTargetOptionsFromCodeGenFlags();
+  CodeGen->setTargetOptions(TargetOpts);
+
   return LDPS_OK;
 }
 
@@ -243,7 +264,7 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
 /// with add_symbol if possible.
 static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed) {
-  lto_module_t M;
+  LTOModule *M;
   const void *view;
   std::unique_ptr<MemoryBuffer> buffer;
   if (get_view) {
@@ -266,17 +287,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     view = buffer->getBufferStart();
   }
 
-  if (!lto_module_is_object_file_in_memory(view, file->filesize))
+  if (!LTOModule::isBitcodeFile(view, file->filesize))
     return LDPS_OK;
 
-  M = lto_module_create_from_memory(view, file->filesize);
+  std::string Error;
+  M = LTOModule::makeLTOModule(view, file->filesize, TargetOpts, Error);
   if (!M) {
-    if (const char* msg = lto_get_error_message()) {
-      (*message)(LDPL_ERROR,
-                 "LLVM gold plugin has failed to create LTO module: %s",
-                 msg);
-      return LDPS_ERR;
-    }
+    (*message)(LDPL_ERROR,
+               "LLVM gold plugin has failed to create LTO module: %s",
+               Error.c_str());
     return LDPS_OK;
   }
 
@@ -285,21 +304,20 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
   claimed_file &cf = Modules.back();
 
   if (!options::triple.empty())
-    lto_module_set_target_triple(M, options::triple.c_str());
+    M->setTargetTriple(options::triple.c_str());
 
   cf.handle = file->handle;
-  unsigned sym_count = lto_module_get_num_symbols(M);
+  unsigned sym_count = M->getSymbolCount();
   cf.syms.reserve(sym_count);
 
   for (unsigned i = 0; i != sym_count; ++i) {
-    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(M, i);
+    lto_symbol_attributes attrs = M->getSymbolAttributes(i);
     if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
       continue;
 
     cf.syms.push_back(ld_plugin_symbol());
     ld_plugin_symbol &sym = cf.syms.back();
-    sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
-    sym.name = strdup(sym.name);
+    sym.name = strdup(M->getSymbolName(i));
     sym.version = NULL;
 
     int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
@@ -361,15 +379,15 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     }
   }
 
-  if (code_gen) {
-    if (lto_codegen_add_module(code_gen, M)) {
-      (*message)(LDPL_ERROR, "Error linking module: %s",
-                 lto_get_error_message());
+  if (CodeGen) {
+    std::string Error;
+    if (!CodeGen->addModule(M, Error)) {
+      (*message)(LDPL_ERROR, "Error linking module: %s", Error.c_str());
       return LDPS_ERR;
     }
   }
 
-  lto_module_dispose(M);
+  delete M;
 
   return LDPS_OK;
 }
@@ -388,7 +406,7 @@ static bool mustPreserve(const claimed_file &F, int i) {
 /// codegen.
 static ld_plugin_status all_symbols_read_hook(void) {
   std::ofstream api_file;
-  assert(code_gen);
+  assert(CodeGen);
 
   if (options::generate_api_file) {
     api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
@@ -405,7 +423,7 @@ static ld_plugin_status all_symbols_read_hook(void) {
     (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
       if (mustPreserve(*I, i)) {
-        lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
+        CodeGen->addMustPreserveSymbol(I->syms[i].name);
 
         if (options::generate_api_file)
           api_file << I->syms[i].name << "\n";
@@ -416,18 +434,10 @@ static ld_plugin_status all_symbols_read_hook(void) {
   if (options::generate_api_file)
     api_file.close();
 
-  lto_codegen_set_pic_model(code_gen, output_type);
-  lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF);
+  CodeGen->setCodePICModel(output_type);
+  CodeGen->setDebugInfo(LTO_DEBUG_MODEL_DWARF);
   if (!options::mcpu.empty())
-    lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
-
-  // Pass through extra options to the code generator.
-  if (!options::extra.empty()) {
-    for (std::vector<std::string>::iterator it = options::extra.begin();
-         it != options::extra.end(); ++it) {
-      lto_codegen_debug_options(code_gen, (*it).c_str());
-    }
-  }
+    CodeGen->setCpu(options::mcpu.c_str());
 
   if (options::generate_bc_file != options::BC_NO) {
     std::string path;
@@ -437,11 +447,11 @@ static ld_plugin_status all_symbols_read_hook(void) {
       path = options::bc_path;
     else
       path = output_name + ".bc";
-    bool err = lto_codegen_write_merged_modules(code_gen, path.c_str());
-    if (err)
+    std::string Error;
+    if (!CodeGen->writeMergedModules(path.c_str(), Error))
       (*message)(LDPL_FATAL, "Failed to write the output file.");
     if (options::generate_bc_file == options::BC_ONLY) {
-      lto_codegen_dispose(code_gen);
+      delete CodeGen;
       exit(0);
     }
   }
@@ -449,13 +459,14 @@ static ld_plugin_status all_symbols_read_hook(void) {
   std::string ObjPath;
   {
     const char *Temp;
-    if (lto_codegen_compile_to_file(code_gen, &Temp)) {
+    std::string Error;
+    if (!CodeGen->compile_to_file(&Temp, /*DisableOpt*/ false, /*DisableInline*/
+                                  false, /*DisableGVNLoadPRE*/ false, Error))
       (*message)(LDPL_ERROR, "Could not produce a combined object file\n");
-    }
     ObjPath = Temp;
   }
 
-  lto_codegen_dispose(code_gen);
+  delete CodeGen;
   for (std::list<claimed_file>::iterator I = Modules.begin(),
          E = Modules.end(); I != E; ++I) {
     for (unsigned i = 0; i != I->syms.size(); ++i) {
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 196240a0c2d4..80a977c30bc8 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -692,7 +692,6 @@ static void writeSymbolTable(
   std::string NameBuf;
   raw_string_ostream NameOS(NameBuf);
   unsigned NumSyms = 0;
-  std::vector<object::SymbolicFile *> DeleteIt;
   LLVMContext &Context = getGlobalContext();
   for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
                                               E = Members.end();
@@ -703,26 +702,23 @@ static void writeSymbolTable(
             MemberBuffer, false, sys::fs::file_magic::unknown, &Context);
     if (!ObjOrErr)
       continue;  // FIXME: check only for "not an object file" errors.
-    object::SymbolicFile *Obj = ObjOrErr.get();
+    std::unique_ptr<object::SymbolicFile> Obj(ObjOrErr.get());
 
-    DeleteIt.push_back(Obj);
     if (!StartOffset) {
       printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
       StartOffset = Out.tell();
       print32BE(Out, 0);
     }
 
-    for (object::basic_symbol_iterator I = Obj->symbol_begin(),
-                                       E = Obj->symbol_end();
-         I != E; ++I) {
-      uint32_t Symflags = I->getFlags();
+    for (const object::BasicSymbolRef &S : Obj->symbols()) {
+      uint32_t Symflags = S.getFlags();
       if (Symflags & object::SymbolRef::SF_FormatSpecific)
         continue;
       if (!(Symflags & object::SymbolRef::SF_Global))
         continue;
       if (Symflags & object::SymbolRef::SF_Undefined)
         continue;
-      failIfError(I->printName(NameOS));
+      failIfError(S.printName(NameOS));
       NameOS << '\0';
       ++NumSyms;
       MemberOffsetRefs.push_back(std::make_pair(Out.tell(), MemberNum));
@@ -731,13 +727,6 @@ static void writeSymbolTable(
   }
   Out << NameOS.str();
 
-  for (std::vector<object::SymbolicFile *>::iterator I = DeleteIt.begin(),
-                                                     E = DeleteIt.end();
-       I != E; ++I) {
-    object::SymbolicFile *O = *I;
-    delete O;
-  }
-
   if (StartOffset == 0)
     return;
 
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index a6b74a7b1baa..31b3d295b41f 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -52,7 +52,8 @@ static cl::opt<bool>
 ShowEncoding("show-encoding", cl::desc("Show instruction encodings"));
 
 static cl::opt<bool>
-CompressDebugSections("compress-debug-sections", cl::desc("Compress DWARF debug sections"));
+CompressDebugSections("compress-debug-sections",
+                      cl::desc("Compress DWARF debug sections"));
 
 static cl::opt<bool>
 ShowInst("show-inst", cl::desc("Show internal instruction representation"));
@@ -149,9 +150,6 @@ static cl::opt<bool>
 GenDwarfForAssembly("g", cl::desc("Generate dwarf debugging info for assembly "
                                   "source files"));
 
-static cl::opt<int>
-DwarfVersion("dwarf-version", cl::desc("Dwarf version"), cl::init(4));
-
 static cl::opt<std::string>
 DebugCompilationDir("fdebug-compilation-dir",
                     cl::desc("Specifies the debug info's compilation dir"));
@@ -240,7 +238,8 @@ static void setDwarfDebugProducer(void) {
   DwarfDebugProducer += getenv("DEBUG_PRODUCER");
 }
 
-static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out) {
+static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
+                      tool_output_file *Out) {
 
   AsmLexer Lexer(MAI);
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(0));
@@ -320,12 +319,13 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out)
 
 static int AssembleInput(const char *ProgName, const Target *TheTarget,
                          SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
-                         MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII) {
+                         MCAsmInfo &MAI, MCSubtargetInfo &STI,
+                         MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
   std::unique_ptr<MCAsmParser> Parser(
       createMCAsmParser(SrcMgr, Ctx, Str, MAI));
   std::unique_ptr<MCTargetAsmParser> TAP(
-      TheTarget->createMCAsmParser(STI, *Parser, MCII,
-                                   InitMCTargetOptionsFromFlags()));
+      TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
+
   if (!TAP) {
     errs() << ProgName
            << ": error: this target does not support assembly parsing.\n";
@@ -356,6 +356,7 @@ int main(int argc, char **argv) {
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+  MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
   TripleName = Triple::normalize(TripleName);
   setDwarfDebugFlags(argc, argv);
 
@@ -391,7 +392,8 @@ int main(int argc, char **argv) {
 
   if (CompressDebugSections) {
     if (!zlib::isAvailable()) {
-      errs() << ProgName << ": build tools with zlib to enable -compress-debug-sections";
+      errs() << ProgName
+             << ": build tools with zlib to enable -compress-debug-sections";
       return 1;
     }
     MAI->setCompressDebugSections(true);
@@ -407,6 +409,8 @@ int main(int argc, char **argv) {
     Ctx.setAllowTemporaryLabels(false);
 
   Ctx.setGenDwarfForAssembly(GenDwarfForAssembly);
+  // Default to 4 for dwarf version.
+  unsigned DwarfVersion = MCOptions.DwarfVersion ? MCOptions.DwarfVersion : 4;
   if (DwarfVersion < 2 || DwarfVersion > 4) {
     errs() << ProgName << ": Dwarf version " << DwarfVersion
            << " is not supported." << '\n';
@@ -479,7 +483,8 @@ int main(int argc, char **argv) {
     Res = AsLexInput(SrcMgr, *MAI, Out.get());
     break;
   case AC_Assemble:
-    Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, *MCII);
+    Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
+                        *MCII, MCOptions);
     break;
   case AC_MDisassemble:
     assert(IP && "Expected assembly output");
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index e605e6e430a7..cccddb0736d1 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -81,6 +81,7 @@ cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
 
 cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
+cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"));
 
 cl::opt<bool> PrintFileName(
     "print-file-name",
@@ -577,6 +578,10 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
     if (SegmentName == "__TEXT" && SectionName == "__text")
       return 't';
+    else if (SegmentName == "__DATA" && SectionName == "__data")
+      return 'd';
+    else if (SegmentName == "__DATA" && SectionName == "__bss")
+      return 'b';
     else
       return 's';
   }
@@ -752,20 +757,28 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       if (ChildOrErr.getError())
         continue;
       if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
-        outs() << O->getFileName() << ":\n";
+        if (isa<MachOObjectFile>(O)) {
+          outs() << Filename << "(" << O->getFileName() << ")";
+        } else
+          outs() << O->getFileName();
+        outs() << ":\n";
         dumpSymbolNamesFromObject(O);
       }
     }
     return;
   }
   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin.get())) {
+    bool moreThanOneArch = UB->getNumberOfObjects() > 1;
     for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                E = UB->end_objects();
          I != E; ++I) {
       std::unique_ptr<ObjectFile> Obj;
       std::unique_ptr<Archive> A;
       if (!I->getAsObjectFile(Obj)) {
-        outs() << Obj->getFileName() << ":\n";
+        outs() << Obj->getFileName();
+        if (isa<MachOObjectFile>(Obj.get()) && moreThanOneArch)
+          outs() << " (for architecture " << I->getArchTypeName() << ")";
+        outs() << ":\n";
         dumpSymbolNamesFromObject(Obj.get());
       }
       else if (!I->getAsArchive(A)) {
@@ -776,8 +789,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
           if (ChildOrErr.getError())
             continue;
           if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
-            outs() << A->getFileName() << ":";
-            outs() << O->getFileName() << ":\n";
+            outs() << A->getFileName();
+            if (isa<MachOObjectFile>(O)) {
+              outs() << "(" << O->getFileName() << ")";
+              if (moreThanOneArch)
+                outs() << " (for architecture " << I->getArchTypeName() << ")";
+            } else
+              outs() << ":" << O->getFileName();
+            outs() << ":\n";
             dumpSymbolNamesFromObject(O);
           }
         }
@@ -810,6 +829,8 @@ int main(int argc, char **argv) {
     OutputFormat = bsd;
   if (POSIXFormat)
     OutputFormat = posix;
+  if (DarwinFormat)
+    OutputFormat = darwin;
 
   // The relative order of these is important. If you pass --size-sort it should
   // only print out the size. However, if you pass -S --size-sort, it should
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index f13b300d6da4..502c7197ddb2 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -18,6 +18,7 @@
 #include "Error.h"
 #include "ObjDumper.h"
 #include "StreamWriter.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -54,6 +55,7 @@ class ELFDumper : public ObjDumper {
   void printProgramHeaders() override;
 
   void printAttributes() override;
+  void printMipsPLTGOT() override;
 
 private:
   typedef ELFFile<ELFT> ELFO;
@@ -159,6 +161,15 @@ getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
   }
 }
 
+template <class ELFT>
+static const typename ELFFile<ELFT>::Elf_Shdr *
+findSectionByAddress(const ELFFile<ELFT> *Obj, uint64_t Addr) {
+  for (const auto &Shdr : Obj->sections())
+    if (Shdr.sh_addr == Addr)
+      return &Shdr;
+  return nullptr;
+}
+
 static const EnumEntry<unsigned> ElfClass[] = {
   { "None",   ELF::ELFCLASSNONE },
   { "32-bit", ELF::ELFCLASS32   },
@@ -1021,3 +1032,209 @@ void ELFDumper<ELFType<support::little, 2, false> >::printAttributes() {
 }
 }
 
+namespace {
+template <class ELFT> class MipsGOTParser {
+public:
+  typedef object::ELFFile<ELFT> ObjectFile;
+  typedef typename ObjectFile::Elf_Shdr Elf_Shdr;
+
+  MipsGOTParser(const ObjectFile *Obj, StreamWriter &W) : Obj(Obj), W(W) {}
+
+  void parseGOT(const Elf_Shdr &GOTShdr);
+
+private:
+  typedef typename ObjectFile::Elf_Sym_Iter Elf_Sym_Iter;
+  typedef typename ObjectFile::Elf_Addr GOTEntry;
+  typedef typename ObjectFile::template ELFEntityIterator<const GOTEntry>
+  GOTIter;
+
+  const ObjectFile *Obj;
+  StreamWriter &W;
+
+  std::size_t getGOTTotal(ArrayRef<uint8_t> GOT) const;
+  GOTIter makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
+
+  bool getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym);
+  void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
+  void printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It,
+                           Elf_Sym_Iter Sym);
+};
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::parseGOT(const Elf_Shdr &GOTShdr) {
+  // See "Global Offset Table" in Chapter 5 in the following document
+  // for detailed GOT description.
+  // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
+
+  ErrorOr<ArrayRef<uint8_t>> GOT = Obj->getSectionContents(&GOTShdr);
+  if (!GOT) {
+    W.startLine() << "The .got section is empty.\n";
+    return;
+  }
+
+  uint64_t DtLocalGotNum;
+  uint64_t DtGotSym;
+  if (!getGOTTags(DtLocalGotNum, DtGotSym))
+    return;
+
+  if (DtLocalGotNum > getGOTTotal(*GOT)) {
+    W.startLine() << "MIPS_LOCAL_GOTNO exceeds a number of GOT entries.\n";
+    return;
+  }
+
+  Elf_Sym_Iter DynSymBegin = Obj->begin_dynamic_symbols();
+  Elf_Sym_Iter DynSymEnd = Obj->end_dynamic_symbols();
+  std::size_t DynSymTotal = std::size_t(std::distance(DynSymBegin, DynSymEnd));
+
+  if (DtGotSym + 1 > DynSymTotal) {
+    W.startLine() << "MIPS_GOTSYM exceeds a number of dynamic symbols.\n";
+    return;
+  }
+
+  std::size_t GlobalGotNum = DynSymTotal - DtGotSym;
+
+  if (DtLocalGotNum + GlobalGotNum > getGOTTotal(*GOT)) {
+    W.startLine() << "Number of global GOT entries exceeds the size of GOT.\n";
+    return;
+  }
+
+  GOTIter GotBegin = makeGOTIter(*GOT, 0);
+  GOTIter GotLocalEnd = makeGOTIter(*GOT, DtLocalGotNum);
+  GOTIter It = GotBegin;
+
+  DictScope GS(W, "Primary GOT");
+
+  W.printHex("Canonical gp value", GOTShdr.sh_addr + 0x7ff0);
+  {
+    ListScope RS(W, "Reserved entries");
+
+    {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It++);
+      W.printString("Purpose", StringRef("Lazy resolver"));
+    }
+
+    if (It != GotLocalEnd && (*It >> (sizeof(GOTEntry) * 8 - 1)) != 0) {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It++);
+      W.printString("Purpose", StringRef("Module pointer (GNU extension)"));
+    }
+  }
+  {
+    ListScope LS(W, "Local entries");
+    for (; It != GotLocalEnd; ++It) {
+      DictScope D(W, "Entry");
+      printGotEntry(GOTShdr.sh_addr, GotBegin, It);
+    }
+  }
+  {
+    ListScope GS(W, "Global entries");
+
+    GOTIter GotGlobalEnd = makeGOTIter(*GOT, DtLocalGotNum + GlobalGotNum);
+    Elf_Sym_Iter GotDynSym = DynSymBegin + DtGotSym;
+    for (; It != GotGlobalEnd; ++It) {
+      DictScope D(W, "Entry");
+      printGlobalGotEntry(GOTShdr.sh_addr, GotBegin, It, GotDynSym++);
+    }
+  }
+
+  std::size_t SpecGotNum = getGOTTotal(*GOT) - DtLocalGotNum - GlobalGotNum;
+  W.printNumber("Number of TLS and multi-GOT entries", uint64_t(SpecGotNum));
+}
+
+template <class ELFT>
+std::size_t MipsGOTParser<ELFT>::getGOTTotal(ArrayRef<uint8_t> GOT) const {
+  return GOT.size() / sizeof(GOTEntry);
+}
+
+template <class ELFT>
+typename MipsGOTParser<ELFT>::GOTIter
+MipsGOTParser<ELFT>::makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
+  const char *Data = reinterpret_cast<const char *>(GOT.data());
+  return GOTIter(sizeof(GOTEntry), Data + EntryNum * sizeof(GOTEntry));
+}
+
+template <class ELFT>
+bool MipsGOTParser<ELFT>::getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym) {
+  bool FoundLocalGotNum = false;
+  bool FoundGotSym = false;
+  for (const auto &Entry : Obj->dynamic_table()) {
+    switch (Entry.getTag()) {
+    case ELF::DT_MIPS_LOCAL_GOTNO:
+      LocalGotNum = Entry.getVal();
+      FoundLocalGotNum = true;
+      break;
+    case ELF::DT_MIPS_GOTSYM:
+      GotSym = Entry.getVal();
+      FoundGotSym = true;
+      break;
+    }
+  }
+
+  if (!FoundLocalGotNum) {
+    W.startLine() << "Cannot find MIPS_LOCAL_GOTNO dynamic table tag.\n";
+    return false;
+  }
+
+  if (!FoundGotSym) {
+    W.startLine() << "Cannot find MIPS_GOTSYM dynamic table tag.\n";
+    return false;
+  }
+
+  return true;
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
+                                        GOTIter It) {
+  int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
+  W.printHex("Address", GotAddr + Offset);
+  W.printNumber("Access", Offset - 0x7ff0);
+  W.printHex("Initial", *It);
+}
+
+template <class ELFT>
+void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
+                                              GOTIter It, Elf_Sym_Iter Sym) {
+  printGotEntry(GotAddr, BeginIt, It);
+
+  W.printHex("Value", Sym->st_value);
+  W.printEnum("Type", Sym->getType(), makeArrayRef(ElfSymbolTypes));
+
+  unsigned SectionIndex = 0;
+  StringRef SectionName;
+  getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+  W.printHex("Section", SectionName, SectionIndex);
+
+  std::string FullSymbolName = getFullSymbolName(*Obj, Sym);
+  W.printNumber("Name", FullSymbolName, Sym->st_name);
+}
+
+template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
+  if (Obj->getHeader()->e_machine != EM_MIPS) {
+    W.startLine() << "MIPS PLT GOT is available for MIPS targets only.\n";
+    return;
+  }
+
+  llvm::Optional<uint64_t> DtPltGot;
+  for (const auto &Entry : Obj->dynamic_table()) {
+    if (Entry.getTag() == ELF::DT_PLTGOT) {
+      DtPltGot = Entry.getVal();
+      break;
+    }
+  }
+
+  if (!DtPltGot) {
+    W.startLine() << "Cannot find PLTGOT dynamic table tag.\n";
+    return;
+  }
+
+  const Elf_Shdr *GotShdr = findSectionByAddress(Obj, *DtPltGot);
+  if (!GotShdr) {
+    W.startLine() << "There is no .got section in the file.\n";
+    return;
+  }
+
+  MipsGOTParser<ELFT>(Obj, W).parseGOT(*GotShdr);
+}
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 795193327bc5..f80a28b25cfc 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -40,6 +40,9 @@ class ObjDumper {
   // Only implemented for ARM ELF at this time.
   virtual void printAttributes() { }
 
+  // Only implemented for MIPS ELF at this time.
+  virtual void printMipsPLTGOT() { }
+
 protected:
   StreamWriter& W;
 };
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 52db0ebbef3e..8d2a997a2312 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -135,6 +135,11 @@ namespace opts {
                               cl::desc("Display the ARM attributes section"));
   cl::alias ARMAttributesShort("-a", cl::desc("Alias for --arm-attributes"),
                                cl::aliasopt(ARMAttributes));
+
+  // -mips-plt-got
+  cl::opt<bool>
+  MipsPLTGOT("mips-plt-got",
+             cl::desc("Display the MIPS GOT and PLT GOT sections"));
 } // namespace opts
 
 static int ReturnValue = EXIT_SUCCESS;
@@ -177,6 +182,18 @@ static void reportError(StringRef Input, StringRef Message) {
   ReturnValue = EXIT_FAILURE;
 }
 
+static bool isMipsArch(unsigned Arch) {
+  switch (Arch) {
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// @brief Creates an format-specific object file dumper.
 static std::error_code createDumper(const ObjectFile *Obj, StreamWriter &Writer,
                                     std::unique_ptr<ObjDumper> &Result) {
@@ -234,6 +251,9 @@ static void dumpObject(const ObjectFile *Obj) {
   if (Obj->getArch() == llvm::Triple::arm && Obj->isELF())
     if (opts::ARMAttributes)
       Dumper->printAttributes();
+  if (isMipsArch(Obj->getArch()) && Obj->isELF())
+    if (opts::MipsPLTGOT)
+      Dumper->printMipsPLTGOT();
 }
 
 
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 033195c1d49b..04139480a6d8 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -38,6 +38,7 @@ namespace opts {
   extern llvm::cl::opt<bool> ExpandRelocs;
   extern llvm::cl::opt<bool> CodeViewLineTables;
   extern llvm::cl::opt<bool> ARMAttributes;
+  extern llvm::cl::opt<bool> MipsPLTGOT;
 } // namespace opts
 
 #define LLVM_READOBJ_ENUM_ENT(ns, enum) \
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index ebd41a528023..b71380dcaa4a 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -16,6 +16,8 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -31,22 +33,30 @@
 using namespace llvm;
 using namespace object;
 
-enum OutputFormatTy {berkeley, sysv};
+enum OutputFormatTy {berkeley, sysv, darwin};
 static cl::opt<OutputFormatTy>
        OutputFormat("format",
          cl::desc("Specify output format"),
          cl::values(clEnumVal(sysv, "System V format"),
                     clEnumVal(berkeley, "Berkeley format"),
-                    clEnumValEnd),
+                    clEnumVal(darwin, "Darwin -m format"), clEnumValEnd),
          cl::init(berkeley));
 
 static cl::opt<OutputFormatTy>
        OutputFormatShort(cl::desc("Specify output format"),
          cl::values(clEnumValN(sysv, "A", "System V format"),
                     clEnumValN(berkeley, "B", "Berkeley format"),
+                    clEnumValN(darwin, "m", "Darwin -m format"),
                     clEnumValEnd),
          cl::init(berkeley));
 
+static bool berkeleyHeaderPrinted = false;
+static bool moreThanOneFile = false;
+
+cl::opt<bool> DarwinLongFormat("l",
+         cl::desc("When format is darwin, use long format "
+                  "to include addresses and offsets."));
+
 enum RadixTy {octal = 8, decimal = 10, hexadecimal = 16};
 static cl::opt<unsigned int>
        Radix("-radix",
@@ -85,6 +95,182 @@ static size_t getNumLengthAsString(uint64_t num) {
   return result.size();
 }
 
+/// @brief Return the the printing format for the Radix.
+static const char * getRadixFmt(void) {
+  switch (Radix) {
+  case octal:
+    return PRIo64;
+  case decimal:
+    return PRIu64;
+  case hexadecimal:
+    return PRIx64;
+  }
+  return nullptr;
+}
+
+/// @brief Print the size of each Mach-O segment and section in @p MachO.
+///
+/// This is when used when @c OutputFormat is darwin and produces the same
+/// output as darwin's size(1) -m output.
+static void PrintDarwinSectionSizes(MachOObjectFile *MachO) {
+  std::string fmtbuf;
+  raw_string_ostream fmt(fmtbuf);
+  const char *radix_fmt = getRadixFmt();
+  if (Radix == hexadecimal)
+    fmt << "0x";
+  fmt << "%" << radix_fmt;
+
+  uint32_t LoadCommandCount = MachO->getHeader().ncmds;
+  uint32_t Filetype = MachO->getHeader().filetype;
+  MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
+
+  uint64_t total = 0;
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
+      outs() << "Segment " << Seg.segname << ": "
+             << format(fmt.str().c_str(), Seg.vmsize);
+      if (DarwinLongFormat)
+        outs() << " (vmaddr 0x" << format("%" PRIx64, Seg.vmaddr)
+               << " fileoff " << Seg.fileoff << ")";
+      outs() << "\n";
+      total += Seg.vmsize;
+      uint64_t sec_total = 0;
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section_64 Sec = MachO->getSection64(Load, J);
+        if (Filetype == MachO::MH_OBJECT)
+          outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", "
+                 << format("%.16s", &Sec.sectname) << "): ";
+        else
+          outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": ";
+        outs() << format(fmt.str().c_str(), Sec.size);
+        if (DarwinLongFormat)
+          outs() << " (addr 0x" << format("%" PRIx64, Sec.addr)
+                 << " offset " << Sec.offset << ")";
+        outs() << "\n";
+        sec_total += Sec.size;
+      }
+      if (Seg.nsects != 0)
+        outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n";
+    }
+    else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load);
+      outs() << "Segment " << Seg.segname << ": "
+             << format(fmt.str().c_str(), Seg.vmsize);
+      if (DarwinLongFormat)
+        outs() << " (vmaddr 0x" << format("%" PRIx64, Seg.vmaddr)
+               << " fileoff " << Seg.fileoff << ")";
+      outs() << "\n";
+      total += Seg.vmsize;
+      uint64_t sec_total = 0;
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section Sec = MachO->getSection(Load, J);
+        if (Filetype == MachO::MH_OBJECT)
+          outs() << "\tSection (" << format("%.16s", &Sec.segname) << ", "
+                 << format("%.16s", &Sec.sectname) << "): ";
+        else
+          outs() << "\tSection " << format("%.16s", &Sec.sectname) << ": ";
+        outs() << format(fmt.str().c_str(), Sec.size);
+        if (DarwinLongFormat)
+          outs() << " (addr 0x" << format("%" PRIx64, Sec.addr)
+                 << " offset " << Sec.offset << ")";
+        outs() << "\n";
+        sec_total += Sec.size;
+      }
+      if (Seg.nsects != 0)
+        outs() << "\ttotal " << format(fmt.str().c_str(), sec_total) << "\n";
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = MachO->getNextLoadCommandInfo(Load);
+  }
+  outs() << "total " << format(fmt.str().c_str(), total) << "\n";
+}
+
+/// @brief Print the summary sizes of the standard Mach-O segments in @p MachO.
+///
+/// This is when used when @c OutputFormat is berkeley with a Mach-O file and
+/// produces the same output as darwin's size(1) default output.
+static void PrintDarwinSegmentSizes(MachOObjectFile *MachO) {
+  uint32_t LoadCommandCount = MachO->getHeader().ncmds;
+  MachOObjectFile::LoadCommandInfo Load = MachO->getFirstLoadCommandInfo();
+
+  uint64_t total_text = 0;
+  uint64_t total_data = 0;
+  uint64_t total_objc = 0;
+  uint64_t total_others = 0;
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Load);
+      if (MachO->getHeader().filetype == MachO::MH_OBJECT) {
+        for (unsigned J = 0; J < Seg.nsects; ++J) {
+          MachO::section_64 Sec = MachO->getSection64(Load, J);
+          StringRef SegmentName = StringRef(Sec.segname);
+          if (SegmentName == "__TEXT")
+            total_text += Sec.size;
+          else if (SegmentName == "__DATA")
+            total_data += Sec.size;
+          else if (SegmentName == "__OBJC")
+            total_objc += Sec.size;
+          else
+            total_others += Sec.size;
+	}
+      } else {
+        StringRef SegmentName = StringRef(Seg.segname);
+        if (SegmentName == "__TEXT")
+          total_text += Seg.vmsize;
+        else if (SegmentName == "__DATA")
+          total_data += Seg.vmsize;
+        else if (SegmentName == "__OBJC")
+          total_objc += Seg.vmsize;
+        else
+          total_others += Seg.vmsize;
+      }
+    }
+    else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = MachO->getSegmentLoadCommand(Load);
+      if (MachO->getHeader().filetype == MachO::MH_OBJECT) {
+        for (unsigned J = 0; J < Seg.nsects; ++J) {
+          MachO::section Sec = MachO->getSection(Load, J);
+          StringRef SegmentName = StringRef(Sec.segname);
+          if (SegmentName == "__TEXT")
+            total_text += Sec.size;
+          else if (SegmentName == "__DATA")
+            total_data += Sec.size;
+          else if (SegmentName == "__OBJC")
+            total_objc += Sec.size;
+          else
+            total_others += Sec.size;
+	}
+      } else {
+        StringRef SegmentName = StringRef(Seg.segname);
+        if (SegmentName == "__TEXT")
+          total_text += Seg.vmsize;
+        else if (SegmentName == "__DATA")
+          total_data += Seg.vmsize;
+        else if (SegmentName == "__OBJC")
+          total_objc += Seg.vmsize;
+        else
+          total_others += Seg.vmsize;
+      }
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = MachO->getNextLoadCommandInfo(Load);
+  }
+  uint64_t total = total_text + total_data + total_objc + total_others;
+
+  if (!berkeleyHeaderPrinted) {
+    outs() << "__TEXT\t__DATA\t__OBJC\tothers\tdec\thex\n";
+    berkeleyHeaderPrinted = true;
+  }
+  outs() << total_text << "\t" << total_data << "\t" << total_objc << "\t"
+         << total_others << "\t" << total << "\t" << format("%" PRIx64, total)
+         << "\t";
+}
+
 /// @brief Print the size of each section in @p Obj.
 ///
 /// The format used is determined by @c OutputFormat and @c Radix.
@@ -92,20 +278,19 @@ static void PrintObjectSectionSizes(ObjectFile *Obj) {
   uint64_t total = 0;
   std::string fmtbuf;
   raw_string_ostream fmt(fmtbuf);
-
-  const char *radix_fmt = nullptr;
-  switch (Radix) {
-  case octal:
-    radix_fmt = PRIo64;
-    break;
-  case decimal:
-    radix_fmt = PRIu64;
-    break;
-  case hexadecimal:
-    radix_fmt = PRIx64;
-    break;
-  }
-  if (OutputFormat == sysv) {
+  const char *radix_fmt = getRadixFmt();
+
+  // If OutputFormat is darwin and we have a MachOObjectFile print as darwin's
+  // size(1) -m output, else if OutputFormat is darwin and not a Mach-O object
+  // let it fall through to OutputFormat berkeley.
+  MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(Obj);
+  if (OutputFormat == darwin && MachO)
+    PrintDarwinSectionSizes(MachO);
+  // If we have a MachOObjectFile and the OutputFormat is berkeley print as
+  // darwin's default berkeley format for Mach-O files.
+  else if (MachO && OutputFormat == berkeley)
+    PrintDarwinSegmentSizes(MachO);
+  else if (OutputFormat == sysv) {
     // Run two passes over all sections. The first gets the lengths needed for
     // formatting the output. The second actually does the output.
     std::size_t max_name_len = strlen("section");
@@ -204,6 +389,13 @@ static void PrintObjectSectionSizes(ObjectFile *Obj) {
 
     total = total_text + total_data + total_bss;
 
+    if (!berkeleyHeaderPrinted) {
+      outs() << "   text    data     bss     "
+             << (Radix == octal ? "oct" : "dec")
+             << "     hex filename\n";
+      berkeleyHeaderPrinted = true;
+    }
+
     // Print result.
     fmt << "%#7" << radix_fmt << " "
         << "%#7" << radix_fmt << " "
@@ -251,20 +443,93 @@ static void PrintFileSectionSizes(StringRef file) {
         continue;
       }
       if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) {
+        MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
         if (OutputFormat == sysv)
           outs() << o->getFileName() << "   (ex " << a->getFileName()
                   << "):\n";
+        else if(MachO && OutputFormat == darwin)
+            outs() << a->getFileName() << "(" << o->getFileName() << "):\n";
         PrintObjectSectionSizes(o);
-        if (OutputFormat == berkeley)
-          outs() << o->getFileName() << " (ex " << a->getFileName() << ")\n";
+        if (OutputFormat == berkeley) {
+          if (MachO)
+            outs() << a->getFileName() << "(" << o->getFileName() << ")\n";
+          else
+            outs() << o->getFileName() << " (ex " << a->getFileName() << ")\n";
+        }
+      }
+    }
+  } else if (MachOUniversalBinary *UB =
+             dyn_cast<MachOUniversalBinary>(binary.get())) {
+    // This is a Mach-O universal binary. Iterate over each object and
+    // display its sizes.
+    bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+    for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+                                               E = UB->end_objects();
+         I != E; ++I) {
+      std::unique_ptr<ObjectFile> UO;
+      std::unique_ptr<Archive> UA;
+      if (!I->getAsObjectFile(UO)) {
+        if (ObjectFile *o = dyn_cast<ObjectFile>(&*UO.get())) {
+          MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+          if (OutputFormat == sysv)
+            outs() << o->getFileName() << "  :\n";
+          else if(MachO && OutputFormat == darwin) {
+            if (moreThanOneFile || moreThanOneArch)
+              outs() << o->getFileName() << " (for architecture "
+                     << I->getArchTypeName() << "):";
+            outs() << "\n";
+          }
+          PrintObjectSectionSizes(o);
+          if (OutputFormat == berkeley) {
+            if (!MachO || moreThanOneFile || moreThanOneArch)
+              outs() << o->getFileName() << " (for architecture "
+                     << I->getArchTypeName() << ")";
+            outs() << "\n";
+          }
+        }
+      }
+      else if (!I->getAsArchive(UA)) {
+        // This is an archive. Iterate over each member and display its sizes.
+        for (object::Archive::child_iterator i = UA->child_begin(),
+                                             e = UA->child_end(); i != e; ++i) {
+          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+          if (std::error_code EC = ChildOrErr.getError()) {
+            errs() << ToolName << ": " << file << ": " << EC.message() << ".\n";
+            continue;
+          }
+          if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) {
+            MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+            if (OutputFormat == sysv)
+              outs() << o->getFileName() << "   (ex " << UA->getFileName()
+                     << "):\n";
+            else if(MachO && OutputFormat == darwin)
+              outs() << UA->getFileName() << "(" << o->getFileName() << ")"
+                     << " (for architecture " << I->getArchTypeName()
+                     << "):\n";
+            PrintObjectSectionSizes(o);
+            if (OutputFormat == berkeley) {
+              if (MachO)
+                outs() << UA->getFileName() << "(" << o->getFileName() << ")"
+                       << " (for architecture " << I->getArchTypeName()
+                       << ")\n";
+              else
+                outs() << o->getFileName() << " (ex " << UA->getFileName()
+                       << ")\n";
+            }
+          }
+        }
       }
     }
   } else if (ObjectFile *o = dyn_cast<ObjectFile>(binary.get())) {
     if (OutputFormat == sysv)
       outs() << o->getFileName() << "  :\n";
     PrintObjectSectionSizes(o);
-    if (OutputFormat == berkeley)
-      outs() << o->getFileName() << "\n";
+    if (OutputFormat == berkeley) {
+      MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o);
+      if (!MachO || moreThanOneFile)
+        outs() << o->getFileName();
+      outs() << "\n";
+    }
   } else {
     errs() << ToolName << ": " << file << ": " << "Unrecognized file type.\n";
   }
@@ -290,11 +555,7 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
-  if (OutputFormat == berkeley)
-    outs() << "   text    data     bss     "
-           << (Radix == octal ? "oct" : "dec")
-           << "     hex filename\n";
-
+  moreThanOneFile = InputFilenames.size() > 1;
   std::for_each(InputFilenames.begin(), InputFilenames.end(),
                 PrintFileSectionSizes);
 
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 64abf5cd0582..d53d72a52293 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -13,11 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/lto.h"
-#include "llvm-c/Core.h"
-#include "llvm-c/Target.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/LTO/LTOModule.h"
+#include "llvm/Support/TargetSelect.h"
 
 // extra command-line flags needed for LTOCodeGenerator
 static cl::opt<bool>
@@ -46,12 +45,12 @@ static bool parsedOptions = false;
 // Initialize the configured targets if they have not been initialized.
 static void lto_initialize() {
   if (!initialized) {
-    LLVMInitializeAllTargetInfos();
-    LLVMInitializeAllTargets();
-    LLVMInitializeAllTargetMCs();
-    LLVMInitializeAllAsmParsers();
-    LLVMInitializeAllAsmPrinters();
-    LLVMInitializeAllDisassemblers();
+    InitializeAllTargetInfos();
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+    InitializeAllAsmParsers();
+    InitializeAllAsmPrinters();
+    InitializeAllDisassemblers();
     initialized = true;
   }
 }
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 51197231f2b5..0f214f3d0c80 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -23,7 +23,6 @@ set(ADTSources
   MakeUniqueTest.cpp
   MapVectorTest.cpp
   OptionalTest.cpp
-  OwningPtrTest.cpp
   PackedVectorTest.cpp
   PointerIntPairTest.cpp
   PointerUnionTest.cpp
diff --git a/unittests/ADT/OwningPtrTest.cpp b/unittests/ADT/OwningPtrTest.cpp
deleted file mode 100644
index d83a9471cc56..000000000000
--- a/unittests/ADT/OwningPtrTest.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-//===- llvm/unittest/ADT/OwningPtrTest.cpp - OwningPtr unit tests ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/OwningPtr.h"
-#include "gtest/gtest.h"
-using namespace llvm;
-
-namespace {
-
-struct TrackDestructor {
-  static unsigned Destructions;
-  int val;
-  explicit TrackDestructor(int val) : val(val) {}
-  ~TrackDestructor() { ++Destructions; }
-  static void ResetCounts() { Destructions = 0; }
-
-private:
-  TrackDestructor(const TrackDestructor &other) LLVM_DELETED_FUNCTION;
-  TrackDestructor &
-  operator=(const TrackDestructor &other) LLVM_DELETED_FUNCTION;
-  TrackDestructor(TrackDestructor &&other) LLVM_DELETED_FUNCTION;
-  TrackDestructor &operator=(TrackDestructor &&other) LLVM_DELETED_FUNCTION;
-};
-
-unsigned TrackDestructor::Destructions = 0;
-
-// Test fixture
-class OwningPtrTest : public testing::Test {};
-
-TEST_F(OwningPtrTest, DefaultConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> O;
-    EXPECT_FALSE(O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-  }
-  EXPECT_EQ(0u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, PtrConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    EXPECT_TRUE((bool)O);
-    EXPECT_FALSE(!O);
-    EXPECT_TRUE(O.get());
-    EXPECT_TRUE(O.isValid());
-    EXPECT_EQ(3, (*O).val);
-    EXPECT_EQ(3, O->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Reset) {
-  TrackDestructor::ResetCounts();
-  OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-  EXPECT_EQ(0u, TrackDestructor::Destructions);
-  O.reset();
-  EXPECT_FALSE((bool)O);
-  EXPECT_TRUE(!O);
-  EXPECT_FALSE(O.get());
-  EXPECT_FALSE(O.isValid());
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Take) {
-  TrackDestructor::ResetCounts();
-  TrackDestructor *T = nullptr;
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    T = O.take();
-    EXPECT_FALSE((bool)O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-    EXPECT_TRUE(T);
-    EXPECT_EQ(3, T->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  delete T;
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Release) {
-  TrackDestructor::ResetCounts();
-  TrackDestructor *T = nullptr;
-  {
-    OwningPtr<TrackDestructor> O(new TrackDestructor(3));
-    T = O.release();
-    EXPECT_FALSE((bool)O);
-    EXPECT_TRUE(!O);
-    EXPECT_FALSE(O.get());
-    EXPECT_FALSE(O.isValid());
-    EXPECT_TRUE(T);
-    EXPECT_EQ(3, T->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  delete T;
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, MoveConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE((bool)A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, MoveAssignment) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(1u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, Swap) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B.swap(A);
-    EXPECT_TRUE((bool)A);
-    EXPECT_FALSE(!A);
-    EXPECT_TRUE(A.get());
-    EXPECT_TRUE(A.isValid());
-    EXPECT_EQ(4, (*A).val);
-    EXPECT_EQ(4, A->val);
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    swap(A, B);
-    EXPECT_TRUE((bool)A);
-    EXPECT_FALSE(!A);
-    EXPECT_TRUE(A.get());
-    EXPECT_TRUE(A.isValid());
-    EXPECT_EQ(4, (*A).val);
-    EXPECT_EQ(4, A->val);
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, UniqueToOwningConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    std::unique_ptr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, UniqueToOwningAssignment) {
-  TrackDestructor::ResetCounts();
-  {
-    std::unique_ptr<TrackDestructor> A(new TrackDestructor(3));
-    OwningPtr<TrackDestructor> B(new TrackDestructor(4));
-    B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_TRUE(B.isValid());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(1u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(2u, TrackDestructor::Destructions);
-}
-
-TEST_F(OwningPtrTest, TakeUniqueConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    std::unique_ptr<TrackDestructor> B = A.take_unique();
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
-TEST_F(OwningPtrTest, OwningToUniqueConstruction) {
-  TrackDestructor::ResetCounts();
-  {
-    OwningPtr<TrackDestructor> A(new TrackDestructor(3));
-    std::unique_ptr<TrackDestructor> B = std::move(A);
-    EXPECT_FALSE(A);
-    EXPECT_TRUE(!A);
-    EXPECT_FALSE(A.get());
-    EXPECT_FALSE(A.isValid());
-    EXPECT_TRUE((bool)B);
-    EXPECT_FALSE(!B);
-    EXPECT_TRUE(B.get());
-    EXPECT_EQ(3, (*B).val);
-    EXPECT_EQ(3, B->val);
-    EXPECT_EQ(0u, TrackDestructor::Destructions);
-  }
-  EXPECT_EQ(1u, TrackDestructor::Destructions);
-}
-#endif
-}
diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp
index 248740aeb726..51acd2fc641a 100644
--- a/unittests/IR/ValueMapTest.cpp
+++ b/unittests/IR/ValueMapTest.cpp
@@ -186,19 +186,19 @@ struct LockMutex : ValueMapConfig<KeyT, MutexT> {
   };
   static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
     *Data.CalledRAUW = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+    EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked.";
   }
   static void onDelete(const ExtraData &Data, KeyT Old) {
     *Data.CalledDeleted = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+    EXPECT_FALSE(Data.M->try_lock()) << "Mutex should already be locked.";
   }
   static MutexT *getMutex(const ExtraData &Data) { return Data.M; }
 };
 #if LLVM_ENABLE_THREADS
 TYPED_TEST(ValueMapTest, LocksMutex) {
-  sys::Mutex M(false);  // Not recursive.
+  std::mutex M;  // Not recursive.
   bool CalledRAUW = false, CalledDeleted = false;
-  typedef LockMutex<TypeParam*, sys::Mutex> ConfigType;
+  typedef LockMutex<TypeParam*, std::mutex> ConfigType;
   typename ConfigType::ExtraData Data = {&M, &CalledRAUW, &CalledDeleted};
   ValueMap<TypeParam*, int, ConfigType> VM(Data);
   VM[this->BitcastV.get()] = 7;
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 0ea931033339..08096a4a179a 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -29,7 +29,9 @@ add_llvm_unittest(SupportTests
   ProcessTest.cpp
   ProgramTest.cpp
   RegexTest.cpp
+  ScaledNumberTest.cpp
   SourceMgrTest.cpp
+  StringPool.cpp
   SwapByteOrderTest.cpp
   ThreadLocalTest.cpp
   TimeValueTest.cpp
diff --git a/unittests/Support/ConvertUTFTest.cpp b/unittests/Support/ConvertUTFTest.cpp
index 3b71ed1b6a6e..16c9bebfde16 100644
--- a/unittests/Support/ConvertUTFTest.cpp
+++ b/unittests/Support/ConvertUTFTest.cpp
@@ -11,6 +11,7 @@
 #include "gtest/gtest.h"
 #include <string>
 #include <vector>
+#include <utility>
 
 using namespace llvm;
 
@@ -65,6 +66,39 @@ TEST(ConvertUTFTest, HasUTF16BOM) {
   EXPECT_FALSE(HasBOM);
 }
 
+struct ConvertUTFResultContainer {
+  ConversionResult ErrorCode;
+  std::vector<unsigned> UnicodeScalars;
+
+  ConvertUTFResultContainer(ConversionResult ErrorCode)
+      : ErrorCode(ErrorCode) {}
+
+  ConvertUTFResultContainer
+  withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
+              unsigned US2 = 0x110000, unsigned US3 = 0x110000,
+              unsigned US4 = 0x110000, unsigned US5 = 0x110000,
+              unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
+    ConvertUTFResultContainer Result(*this);
+    if (US0 != 0x110000)
+      Result.UnicodeScalars.push_back(US0);
+    if (US1 != 0x110000)
+      Result.UnicodeScalars.push_back(US1);
+    if (US2 != 0x110000)
+      Result.UnicodeScalars.push_back(US2);
+    if (US3 != 0x110000)
+      Result.UnicodeScalars.push_back(US3);
+    if (US4 != 0x110000)
+      Result.UnicodeScalars.push_back(US4);
+    if (US5 != 0x110000)
+      Result.UnicodeScalars.push_back(US5);
+    if (US6 != 0x110000)
+      Result.UnicodeScalars.push_back(US6);
+    if (US7 != 0x110000)
+      Result.UnicodeScalars.push_back(US7);
+    return Result;
+  }
+};
+
 std::pair<ConversionResult, std::vector<unsigned>>
 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
@@ -73,17 +107,55 @@ ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
   std::vector<UTF32> Decoded(S.size(), 0);
   UTF32 *TargetStart = Decoded.data();
 
-  auto Result =
+  auto ErrorCode =
       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
                          Decoded.data() + Decoded.size(), lenientConversion);
 
   Decoded.resize(TargetStart - Decoded.data());
 
-  return std::make_pair(Result, Decoded);
+  return std::make_pair(ErrorCode, Decoded);
+}
+
+std::pair<ConversionResult, std::vector<unsigned>>
+ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
+  const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
+
+  const UTF8 *SourceNext = SourceStart;
+  std::vector<UTF32> Decoded(S.size(), 0);
+  UTF32 *TargetStart = Decoded.data();
+
+  auto ErrorCode = ConvertUTF8toUTF32Partial(
+      &SourceNext, SourceStart + S.size(), &TargetStart,
+      Decoded.data() + Decoded.size(), lenientConversion);
+
+  Decoded.resize(TargetStart - Decoded.data());
+
+  return std::make_pair(ErrorCode, Decoded);
 }
 
-#define R0(RESULT) std::make_pair(RESULT, std::vector<unsigned>{})
-#define R(RESULT, ...) std::make_pair(RESULT, std::vector<unsigned>{ __VA_ARGS__ })
+::testing::AssertionResult
+CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
+                                 StringRef S, bool Partial = false) {
+  ConversionResult ErrorCode;
+  std::vector<unsigned> Decoded;
+  if (!Partial)
+    std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
+  else
+
+    std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
+  if (Expected.ErrorCode != ErrorCode)
+    return ::testing::AssertionFailure() << "Expected error code "
+                                         << Expected.ErrorCode << ", actual "
+                                         << ErrorCode;
+
+  if (Expected.UnicodeScalars != Decoded)
+    return ::testing::AssertionFailure()
+           << "Expected lenient decoded result:\n"
+           << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
+           << "Actual result:\n" << ::testing::PrintToString(Decoded);
+
+  return ::testing::AssertionSuccess();
+}
 
 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
 
@@ -92,25 +164,27 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   //
 
   // U+0041 LATIN CAPITAL LETTER A
-  EXPECT_EQ(R(conversionOK, 0x0041),
-      ConvertUTF8ToUnicodeScalarsLenient("\x41"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
 
   //
   // 2-byte sequences
   //
 
   // U+0283 LATIN SMALL LETTER ESH
-  EXPECT_EQ(R(conversionOK, 0x0283),
-      ConvertUTF8ToUnicodeScalarsLenient("\xca\x83"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
+      "\xca\x83"));
 
   // U+03BA GREEK SMALL LETTER KAPPA
   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
   // U+03C3 GREEK SMALL LETTER SIGMA
   // U+03BC GREEK SMALL LETTER MU
   // U+03B5 GREEK SMALL LETTER EPSILON
-  EXPECT_EQ(R(conversionOK, 0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK)
+          .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
+      "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
 
   //
   // 3-byte sequences
@@ -118,13 +192,15 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
 
   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
   // U+6587 CJK UNIFIED IDEOGRAPH-6587
-  EXPECT_EQ(R(conversionOK, 0x4f8b, 0x6587),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe4\xbe\x8b\xe6\x96\x87"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
+      "\xe4\xbe\x8b\xe6\x96\x87"));
 
   // U+D55C HANGUL SYLLABLE HAN
   // U+AE00 HANGUL SYLLABLE GEUL
-  EXPECT_EQ(R(conversionOK, 0xd55c, 0xae00),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x95\x9c\xea\xb8\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
+      "\xed\x95\x9c\xea\xb8\x80"));
 
   // U+1112 HANGUL CHOSEONG HIEUH
   // U+1161 HANGUL JUNGSEONG A
@@ -132,98 +208,122 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // U+1100 HANGUL CHOSEONG KIYEOK
   // U+1173 HANGUL JUNGSEONG EU
   // U+11AF HANGUL JONGSEONG RIEUL
-  EXPECT_EQ(R(conversionOK, 0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
-          "\xe1\x86\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK)
+          .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
+      "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
+      "\xe1\x86\xaf"));
 
   //
   // 4-byte sequences
   //
 
   // U+E0100 VARIATION SELECTOR-17
-  EXPECT_EQ(R(conversionOK, 0x000E0100),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xa0\x84\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
+      "\xf3\xa0\x84\x80"));
 
   //
   // First possible sequence of a certain length
   //
 
   // U+0000 NULL
-  EXPECT_EQ(R(conversionOK, 0x0000),
-      ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1)));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
+      StringRef("\x00", 1)));
 
   // U+0080 PADDING CHARACTER
-  EXPECT_EQ(R(conversionOK, 0x0080),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc2\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
+      "\xc2\x80"));
 
   // U+0800 SAMARITAN LETTER ALAF
-  EXPECT_EQ(R(conversionOK, 0x0800),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
+      "\xe0\xa0\x80"));
 
   // U+10000 LINEAR B SYLLABLE B008 A
-  EXPECT_EQ(R(conversionOK, 0x10000),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
+      "\xf0\x90\x80\x80"));
 
   // U+200000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80\x80\x80"));
 
   // U+4000000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80\x80\x80"));
 
   //
   // Last possible sequence of a certain length
   //
 
   // U+007F DELETE
-  EXPECT_EQ(R(conversionOK, 0x007f),
-      ConvertUTF8ToUnicodeScalarsLenient("\x7f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
 
   // U+07FF (unassigned)
-  EXPECT_EQ(R(conversionOK, 0x07ff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xdf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
+      "\xdf\xbf"));
 
   // U+FFFF (noncharacter)
-  EXPECT_EQ(R(conversionOK, 0xffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
+      "\xef\xbf\xbf"));
 
   // U+1FFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf7\xbf\xbf\xbf"));
 
   // U+3FFFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf\xbf\xbf"));
 
   // U+7FFFFFFF (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf\xbf\xbf"));
 
   //
   // Other boundary conditions
   //
 
   // U+D7FF (unassigned)
-  EXPECT_EQ(R(conversionOK, 0xd7ff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
+      "\xed\x9f\xbf"));
 
   // U+E000 (private use)
-  EXPECT_EQ(R(conversionOK, 0xe000),
-      ConvertUTF8ToUnicodeScalarsLenient("\xee\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
+      "\xee\x80\x80"));
 
   // U+FFFD REPLACEMENT CHARACTER
-  EXPECT_EQ(R(conversionOK, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
+      "\xef\xbf\xbd"));
 
   // U+10FFFF (noncharacter)
-  EXPECT_EQ(R(conversionOK, 0x10ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
+      "\xf4\x8f\xbf\xbf"));
 
   // U+110000 (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf4\x90\x80\x80"));
 
   //
   // Unexpected continuation bytes
@@ -232,407 +332,570 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // A sequence of unexpected continuation bytes that don't follow a first
   // byte, every byte is a maximal subpart.
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xbf\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x80\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\x80\xbf\x82\xbf\xaa"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\x80\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xbf\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x80\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\xbf\x82\xbf\xaa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xaa\xb0\xbb\xbf\xaa\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
 
   // All continuation bytes (0x80--0xbf).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-          "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-          "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-          "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+      "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+      "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+      "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
 
   //
   // Lonely start bytes
   //
 
   // Start bytes of 2-byte sequences (0xc0--0xdf).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-          "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
-          "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
-          "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
-          "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+      "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
+      "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
+      "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
+      "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
 
   // Start bytes of 3-byte sequences (0xe0--0xef).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
-          "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
+      "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
 
   // Start bytes of 4-byte sequences (0xf0--0xf7).
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
+                       0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
 
   // Start bytes of 5-byte sequences (0xf8--0xfb).
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xf9\xfa\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xf9\xfa\xfb"));
 
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
 
   // Start bytes of 6-byte sequences (0xfc--0xfd).
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xfd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\xfd"));
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x20\xfd\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xfc\x20\xfd\x20"));
 
   //
   // Other bytes (0xc0--0xc1, 0xfe--0xff).
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xc1\xfe\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe\xfe\xff\xff"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfe\x80\x80\x80\x80\x80"));
-
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xff\x80\x80\x80\x80\x80"));
-
-  EXPECT_EQ(R(sourceIllegal,
-              0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0\xc1\xfe\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfe\xfe\xff\xff"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfe\x80\x80\x80\x80\x80"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xff\x80\x80\x80\x80\x80"));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
+                       0xfffd, 0x0020, 0xfffd, 0x0020),
+      "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
 
   //
   // Sequences with one continuation byte missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc2"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xdf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xec\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\x9f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xee\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe0\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xe1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xec\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xed\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xed\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xee\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xef\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\x90\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf1\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf3\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x8f\xbf"));
 
   // Overlong sequences with one trailing byte missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xc0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xc1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xe0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xe0\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80"));
 
   // Sequences that represent surrogates with one trailing byte missing.
   // High surrogates
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xac"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xaf"));
   // Low surrogates
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xb0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xb4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xed\xbf"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+1100xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf4\x90\x80"));
   // U+13FBxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf4\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf5\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf6\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf7\x80\x80"));
   // U+1FFBxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf7\xbf\xbf"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+2000xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf9\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfa\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\x80\x80\x80"));
   // U+3FFFFxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
   // U+40000xx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80\x80\x80"));
   // U+7FFFFFxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf\xbf"));
 
   //
   // Sequences with two continuation bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x90"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf3\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
+      "\xf4\x8f"));
 
   // Overlong sequences with two trailing byte missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf0\x8f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80"));
 
   // Sequences that represent surrogates with two trailing bytes missing.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+110yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf4\x90"));
   // U+13Fyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf4\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf5\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf6\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf7\x80"));
   // U+1FFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf7\xbf"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+200yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x88\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf8\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xf9\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfa\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfb\x80\x80"));
   // U+3FFFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfb\xbf\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+4000yxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80\x80"));
   // U+7FFFFyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf\xbf"));
 
   //
   // Sequences with three continuation bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
 
   // Broken overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80"));
 
   // Ill-formed 4-byte sequences.
   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+14yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf5"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
   // U+1Cyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
 
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+20yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x88"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\x88"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf8\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xf9\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfa\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfb\x80"));
   // U+3FCyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfb\xbf"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+400yyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x84\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfc\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfd\x80\x80"));
   // U+7FFCyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xfd\xbf\xbf"));
 
   //
   // Sequences with four continuation bytes missing
@@ -641,36 +904,41 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // Ill-formed 5-byte sequences.
   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf9"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfa"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
   // U+3zyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
 
   // Broken overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\x80"));
 
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x84"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\x84"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfc\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfd\x80"));
   // U+7Fzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xfd\xbf"));
 
   //
   // Sequences with five continuation bytes missing
@@ -679,89 +947,124 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // Ill-formed 6-byte sequences.
   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
   // U+uzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
   // U+uuzzyyxx (invalid)
-  EXPECT_EQ(R(sourceIllegal, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
 
   //
   // Consecutive sequences with trailing bytes missing
   //
 
-  EXPECT_EQ(R(sourceIllegal,
-      0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/
-      0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, /**/
-      0xfffd, 0xfffd, 0xfffd, 0xfffd,
-      0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient(
-          "\xc0" "\xe0\x80" "\xf0\x80\x80"
-          "\xf8\x80\x80\x80"
-          "\xfc\x80\x80\x80\x80"
-          "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
-          "\xfb\xbf\xbf\xbf"
-          "\xfd\xbf\xbf\xbf\xbf"));
-
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xc0" "\xe0\x80" "\xf0\x80\x80"
+      "\xf8\x80\x80\x80"
+      "\xfc\x80\x80\x80\x80"
+      "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
+      "\xfb\xbf\xbf\xbf"
+      "\xfd\xbf\xbf\xbf\xbf"));
 
   //
   // Overlong UTF-8 sequences
   //
 
   // U+002F SOLIDUS
-  EXPECT_EQ(R(conversionOK, 0x002f),
-      ConvertUTF8ToUnicodeScalarsLenient("\x2f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
 
   // Overlong sequences of the above.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\xaf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80\xaf"));
 
   // U+0000 NULL
-  EXPECT_EQ(R(conversionOK, 0x0000),
-      ConvertUTF8ToUnicodeScalarsLenient(StringRef("\x00", 1)));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
+      StringRef("\x00", 1)));
 
   // Overlong sequences of the above.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x80\x80\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x80\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x80\x80\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x80\x80\x80\x80\x80"));
 
   // Other overlong sequences.
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc0\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xc1\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xe0\x9f\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\x80\x80"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x8f\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf8\x87\xbf\xbf\xbf"));
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xfc\x83\xbf\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc0\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc1\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
+      "\xc1\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xe0\x9f\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\x80\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf0\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xf8\x87\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xfc\x83\xbf\xbf\xbf\xbf"));
 
   //
   // Isolated surrogates
@@ -780,68 +1083,98 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   // High surrogates
 
   // U+D800
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80"));
 
   // U+DB40
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0"));
 
   // U+DBFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf"));
 
   // Low surrogates
 
   // U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xb0\x80"));
 
   // U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xb4\x80"));
 
   // U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd),
+      "\xed\xbf\xbf"));
 
   // Surrogate pairs
 
   // U+D800 U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xb0\x80"));
 
   // U+D800 U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xb4\x80"));
 
   // U+D800 U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xa0\x80\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xa0\x80\xed\xbf\xbf"));
 
   // U+DB40 U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xb0\x80"));
 
   // U+DB40 U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xb4\x80"));
 
   // U+DB40 U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xac\xa0\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xac\xa0\xed\xbf\xbf"));
 
   // U+DBFF U+DC00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb0\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xb0\x80"));
 
   // U+DBFF U+DD00
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xb4\x80"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xb4\x80"));
 
   // U+DBFF U+DFFF
-  EXPECT_EQ(R(sourceIllegal, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xed\xaf\xbf\xed\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceIllegal)
+          .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
+      "\xed\xaf\xbf\xed\xbf\xbf"));
 
   //
   // Noncharacters
@@ -855,397 +1188,477 @@ TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
   //    and the values U+FDD0..U+FDEF.
 
   // U+FFFE
-  EXPECT_EQ(R(conversionOK, 0xfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
+      "\xef\xbf\xbe"));
 
   // U+FFFF
-  EXPECT_EQ(R(conversionOK, 0xffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
+      "\xef\xbf\xbf"));
 
   // U+1FFFE
-  EXPECT_EQ(R(conversionOK, 0x1fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
+      "\xf0\x9f\xbf\xbe"));
 
   // U+1FFFF
-  EXPECT_EQ(R(conversionOK, 0x1ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
+      "\xf0\x9f\xbf\xbf"));
 
   // U+2FFFE
-  EXPECT_EQ(R(conversionOK, 0x2fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
+      "\xf0\xaf\xbf\xbe"));
 
   // U+2FFFF
-  EXPECT_EQ(R(conversionOK, 0x2ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
+      "\xf0\xaf\xbf\xbf"));
 
   // U+3FFFE
-  EXPECT_EQ(R(conversionOK, 0x3fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
+      "\xf0\xbf\xbf\xbe"));
 
   // U+3FFFF
-  EXPECT_EQ(R(conversionOK, 0x3ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf0\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
+      "\xf0\xbf\xbf\xbf"));
 
   // U+4FFFE
-  EXPECT_EQ(R(conversionOK, 0x4fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
+      "\xf1\x8f\xbf\xbe"));
 
   // U+4FFFF
-  EXPECT_EQ(R(conversionOK, 0x4ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
+      "\xf1\x8f\xbf\xbf"));
 
   // U+5FFFE
-  EXPECT_EQ(R(conversionOK, 0x5fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
+      "\xf1\x9f\xbf\xbe"));
 
   // U+5FFFF
-  EXPECT_EQ(R(conversionOK, 0x5ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
+      "\xf1\x9f\xbf\xbf"));
 
   // U+6FFFE
-  EXPECT_EQ(R(conversionOK, 0x6fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
+      "\xf1\xaf\xbf\xbe"));
 
   // U+6FFFF
-  EXPECT_EQ(R(conversionOK, 0x6ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
+      "\xf1\xaf\xbf\xbf"));
 
   // U+7FFFE
-  EXPECT_EQ(R(conversionOK, 0x7fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
+      "\xf1\xbf\xbf\xbe"));
 
   // U+7FFFF
-  EXPECT_EQ(R(conversionOK, 0x7ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf1\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
+      "\xf1\xbf\xbf\xbf"));
 
   // U+8FFFE
-  EXPECT_EQ(R(conversionOK, 0x8fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
+      "\xf2\x8f\xbf\xbe"));
 
   // U+8FFFF
-  EXPECT_EQ(R(conversionOK, 0x8ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
+      "\xf2\x8f\xbf\xbf"));
 
   // U+9FFFE
-  EXPECT_EQ(R(conversionOK, 0x9fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
+      "\xf2\x9f\xbf\xbe"));
 
   // U+9FFFF
-  EXPECT_EQ(R(conversionOK, 0x9ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
+      "\xf2\x9f\xbf\xbf"));
 
   // U+AFFFE
-  EXPECT_EQ(R(conversionOK, 0xafffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
+      "\xf2\xaf\xbf\xbe"));
 
   // U+AFFFF
-  EXPECT_EQ(R(conversionOK, 0xaffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
+      "\xf2\xaf\xbf\xbf"));
 
   // U+BFFFE
-  EXPECT_EQ(R(conversionOK, 0xbfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
+      "\xf2\xbf\xbf\xbe"));
 
   // U+BFFFF
-  EXPECT_EQ(R(conversionOK, 0xbffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf2\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
+      "\xf2\xbf\xbf\xbf"));
 
   // U+CFFFE
-  EXPECT_EQ(R(conversionOK, 0xcfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
+      "\xf3\x8f\xbf\xbe"));
 
   // U+CFFFF
-  EXPECT_EQ(R(conversionOK, 0xcfffF),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
+      "\xf3\x8f\xbf\xbf"));
 
   // U+DFFFE
-  EXPECT_EQ(R(conversionOK, 0xdfffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
+      "\xf3\x9f\xbf\xbe"));
 
   // U+DFFFF
-  EXPECT_EQ(R(conversionOK, 0xdffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\x9f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
+      "\xf3\x9f\xbf\xbf"));
 
   // U+EFFFE
-  EXPECT_EQ(R(conversionOK, 0xefffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
+      "\xf3\xaf\xbf\xbe"));
 
   // U+EFFFF
-  EXPECT_EQ(R(conversionOK, 0xeffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xaf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
+      "\xf3\xaf\xbf\xbf"));
 
   // U+FFFFE
-  EXPECT_EQ(R(conversionOK, 0xffffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
+      "\xf3\xbf\xbf\xbe"));
 
   // U+FFFFF
-  EXPECT_EQ(R(conversionOK, 0xfffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf3\xbf\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
+      "\xf3\xbf\xbf\xbf"));
 
   // U+10FFFE
-  EXPECT_EQ(R(conversionOK, 0x10fffe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
+      "\xf4\x8f\xbf\xbe"));
 
   // U+10FFFF
-  EXPECT_EQ(R(conversionOK, 0x10ffff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xf4\x8f\xbf\xbf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
+      "\xf4\x8f\xbf\xbf"));
 
   // U+FDD0
-  EXPECT_EQ(R(conversionOK, 0xfdd0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x90"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
+      "\xef\xb7\x90"));
 
   // U+FDD1
-  EXPECT_EQ(R(conversionOK, 0xfdd1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x91"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
+      "\xef\xb7\x91"));
 
   // U+FDD2
-  EXPECT_EQ(R(conversionOK, 0xfdd2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x92"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
+      "\xef\xb7\x92"));
 
   // U+FDD3
-  EXPECT_EQ(R(conversionOK, 0xfdd3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x93"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
+      "\xef\xb7\x93"));
 
   // U+FDD4
-  EXPECT_EQ(R(conversionOK, 0xfdd4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x94"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
+      "\xef\xb7\x94"));
 
   // U+FDD5
-  EXPECT_EQ(R(conversionOK, 0xfdd5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x95"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
+      "\xef\xb7\x95"));
 
   // U+FDD6
-  EXPECT_EQ(R(conversionOK, 0xfdd6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x96"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
+      "\xef\xb7\x96"));
 
   // U+FDD7
-  EXPECT_EQ(R(conversionOK, 0xfdd7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x97"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
+      "\xef\xb7\x97"));
 
   // U+FDD8
-  EXPECT_EQ(R(conversionOK, 0xfdd8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x98"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
+      "\xef\xb7\x98"));
 
   // U+FDD9
-  EXPECT_EQ(R(conversionOK, 0xfdd9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x99"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
+      "\xef\xb7\x99"));
 
   // U+FDDA
-  EXPECT_EQ(R(conversionOK, 0xfdda),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9a"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
+      "\xef\xb7\x9a"));
 
   // U+FDDB
-  EXPECT_EQ(R(conversionOK, 0xfddb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9b"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
+      "\xef\xb7\x9b"));
 
   // U+FDDC
-  EXPECT_EQ(R(conversionOK, 0xfddc),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9c"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
+      "\xef\xb7\x9c"));
 
   // U+FDDD
-  EXPECT_EQ(R(conversionOK, 0xfddd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9d"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
+      "\xef\xb7\x9d"));
 
   // U+FDDE
-  EXPECT_EQ(R(conversionOK, 0xfdde),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9e"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
+      "\xef\xb7\x9e"));
 
   // U+FDDF
-  EXPECT_EQ(R(conversionOK, 0xfddf),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\x9f"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
+      "\xef\xb7\x9f"));
 
   // U+FDE0
-  EXPECT_EQ(R(conversionOK, 0xfde0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
+      "\xef\xb7\xa0"));
 
   // U+FDE1
-  EXPECT_EQ(R(conversionOK, 0xfde1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
+      "\xef\xb7\xa1"));
 
   // U+FDE2
-  EXPECT_EQ(R(conversionOK, 0xfde2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
+      "\xef\xb7\xa2"));
 
   // U+FDE3
-  EXPECT_EQ(R(conversionOK, 0xfde3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
+      "\xef\xb7\xa3"));
 
   // U+FDE4
-  EXPECT_EQ(R(conversionOK, 0xfde4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
+      "\xef\xb7\xa4"));
 
   // U+FDE5
-  EXPECT_EQ(R(conversionOK, 0xfde5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
+      "\xef\xb7\xa5"));
 
   // U+FDE6
-  EXPECT_EQ(R(conversionOK, 0xfde6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
+      "\xef\xb7\xa6"));
 
   // U+FDE7
-  EXPECT_EQ(R(conversionOK, 0xfde7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
+      "\xef\xb7\xa7"));
 
   // U+FDE8
-  EXPECT_EQ(R(conversionOK, 0xfde8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
+      "\xef\xb7\xa8"));
 
   // U+FDE9
-  EXPECT_EQ(R(conversionOK, 0xfde9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xa9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
+      "\xef\xb7\xa9"));
 
   // U+FDEA
-  EXPECT_EQ(R(conversionOK, 0xfdea),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaa"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
+      "\xef\xb7\xaa"));
 
   // U+FDEB
-  EXPECT_EQ(R(conversionOK, 0xfdeb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xab"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
+      "\xef\xb7\xab"));
 
   // U+FDEC
-  EXPECT_EQ(R(conversionOK, 0xfdec),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xac"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
+      "\xef\xb7\xac"));
 
   // U+FDED
-  EXPECT_EQ(R(conversionOK, 0xfded),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xad"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
+      "\xef\xb7\xad"));
 
   // U+FDEE
-  EXPECT_EQ(R(conversionOK, 0xfdee),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xae"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
+      "\xef\xb7\xae"));
 
   // U+FDEF
-  EXPECT_EQ(R(conversionOK, 0xfdef),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xaf"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
+      "\xef\xb7\xaf"));
 
   // U+FDF0
-  EXPECT_EQ(R(conversionOK, 0xfdf0),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb0"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
+      "\xef\xb7\xb0"));
 
   // U+FDF1
-  EXPECT_EQ(R(conversionOK, 0xfdf1),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb1"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
+      "\xef\xb7\xb1"));
 
   // U+FDF2
-  EXPECT_EQ(R(conversionOK, 0xfdf2),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
+      "\xef\xb7\xb2"));
 
   // U+FDF3
-  EXPECT_EQ(R(conversionOK, 0xfdf3),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb3"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
+      "\xef\xb7\xb3"));
 
   // U+FDF4
-  EXPECT_EQ(R(conversionOK, 0xfdf4),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb4"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
+      "\xef\xb7\xb4"));
 
   // U+FDF5
-  EXPECT_EQ(R(conversionOK, 0xfdf5),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb5"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
+      "\xef\xb7\xb5"));
 
   // U+FDF6
-  EXPECT_EQ(R(conversionOK, 0xfdf6),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb6"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
+      "\xef\xb7\xb6"));
 
   // U+FDF7
-  EXPECT_EQ(R(conversionOK, 0xfdf7),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb7"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
+      "\xef\xb7\xb7"));
 
   // U+FDF8
-  EXPECT_EQ(R(conversionOK, 0xfdf8),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb8"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
+      "\xef\xb7\xb8"));
 
   // U+FDF9
-  EXPECT_EQ(R(conversionOK, 0xfdf9),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xb9"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
+      "\xef\xb7\xb9"));
 
   // U+FDFA
-  EXPECT_EQ(R(conversionOK, 0xfdfa),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xba"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
+      "\xef\xb7\xba"));
 
   // U+FDFB
-  EXPECT_EQ(R(conversionOK, 0xfdfb),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbb"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
+      "\xef\xb7\xbb"));
 
   // U+FDFC
-  EXPECT_EQ(R(conversionOK, 0xfdfc),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbc"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
+      "\xef\xb7\xbc"));
 
   // U+FDFD
-  EXPECT_EQ(R(conversionOK, 0xfdfd),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbd"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
+      "\xef\xb7\xbd"));
 
   // U+FDFE
-  EXPECT_EQ(R(conversionOK, 0xfdfe),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbe"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
+      "\xef\xb7\xbe"));
 
   // U+FDFF
-  EXPECT_EQ(R(conversionOK, 0xfdff),
-      ConvertUTF8ToUnicodeScalarsLenient("\xef\xb7\xbf"));
-}
-
-std::pair<ConversionResult, std::vector<unsigned>>
-ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
-  const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
-
-  const UTF8 *SourceNext = SourceStart;
-  std::vector<UTF32> Decoded(S.size(), 0);
-  UTF32 *TargetStart = Decoded.data();
-
-  auto Result = ConvertUTF8toUTF32Partial(
-      &SourceNext, SourceStart + S.size(), &TargetStart,
-      Decoded.data() + Decoded.size(), lenientConversion);
-
-  Decoded.resize(TargetStart - Decoded.data());
-
-  return std::make_pair(Result, Decoded);
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
+      "\xef\xb7\xbf"));
 }
 
 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
   // U+0041 LATIN CAPITAL LETTER A
-  EXPECT_EQ(R(conversionOK, 0x0041),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\x41"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
+      "\x41", true));
 
   //
   // Sequences with one continuation byte missing
   //
 
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xc2"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xdf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xa0"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe0\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xe1\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xec\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xed\x9f"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xee\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xef\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\x90\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf0\xbf\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf1\x80\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf3\xbf\xbf"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x80\x80"));
-  EXPECT_EQ(R0(sourceExhausted),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\xf4\x8f\xbf"));
-
-  EXPECT_EQ(R(sourceExhausted, 0x0041),
-      ConvertUTF8ToUnicodeScalarsPartialLenient("\x41\xc2"));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xc2", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xdf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe0\xa0", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe0\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xe1\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xec\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xed\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xed\x9f", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xee\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xef\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf0\x90\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf0\xbf\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf1\x80\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf3\xbf\xbf", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf4\x80\x80", true));
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted),
+      "\xf4\x8f\xbf", true));
+
+  EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
+      ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
+      "\x41\xc2", true));
 }
 
-#undef R0
-#undef R
-
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
index ad2fc977e6d7..153884ba4298 100644
--- a/unittests/Support/ManagedStatic.cpp
+++ b/unittests/Support/ManagedStatic.cpp
@@ -47,7 +47,6 @@ TEST(Initialize, MultipleThreads) {
   void *p1 = test1::allocate_stack(a1);
   void *p2 = test1::allocate_stack(a2);
 
-  llvm_start_multithreaded();
   pthread_t t1, t2;
   pthread_create(&t1, &a1, test1::helper, nullptr);
   pthread_create(&t2, &a2, test1::helper, nullptr);
@@ -55,7 +54,6 @@ TEST(Initialize, MultipleThreads) {
   pthread_join(t2, nullptr);
   free(p1);
   free(p2);
-  llvm_stop_multithreaded();
 }
 #endif
 
diff --git a/unittests/Support/ScaledNumberTest.cpp b/unittests/Support/ScaledNumberTest.cpp
new file mode 100644
index 000000000000..d1277d0479a6
--- /dev/null
+++ b/unittests/Support/ScaledNumberTest.cpp
@@ -0,0 +1,81 @@
+//===- llvm/unittest/Support/ScaledNumberTest.cpp - ScaledPair tests -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ScaledNumber.h"
+
+#include "llvm/Support/DataTypes.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::ScaledNumbers;
+
+namespace {
+
+template <class UIntT> struct ScaledPair {
+  UIntT D;
+  int S;
+  ScaledPair(const std::pair<UIntT, int16_t> &F) : D(F.first), S(F.second) {}
+  ScaledPair(UIntT D, int S) : D(D), S(S) {}
+
+  bool operator==(const ScaledPair<UIntT> &X) const {
+    return D == X.D && S == X.S;
+  }
+};
+template <class UIntT>
+bool operator==(const std::pair<UIntT, int16_t> &L,
+                const ScaledPair<UIntT> &R) {
+  return ScaledPair<UIntT>(L) == R;
+}
+template <class UIntT>
+void PrintTo(const ScaledPair<UIntT> &F, ::std::ostream *os) {
+  *os << F.D << "*2^" << F.S;
+}
+
+typedef ScaledPair<uint32_t> SP32;
+typedef ScaledPair<uint64_t> SP64;
+
+TEST(ScaledNumberHelpersTest, getRounded) {
+  EXPECT_EQ(getRounded32(0, 0, false), SP32(0, 0));
+  EXPECT_EQ(getRounded32(0, 0, true), SP32(1, 0));
+  EXPECT_EQ(getRounded32(20, 21, true), SP32(21, 21));
+  EXPECT_EQ(getRounded32(UINT32_MAX, 0, false), SP32(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded32(UINT32_MAX, 0, true), SP32(1 << 31, 1));
+
+  EXPECT_EQ(getRounded64(0, 0, false), SP64(0, 0));
+  EXPECT_EQ(getRounded64(0, 0, true), SP64(1, 0));
+  EXPECT_EQ(getRounded64(20, 21, true), SP64(21, 21));
+  EXPECT_EQ(getRounded64(UINT32_MAX, 0, false), SP64(UINT32_MAX, 0));
+  EXPECT_EQ(getRounded64(UINT32_MAX, 0, true), SP64(UINT64_C(1) << 32, 0));
+  EXPECT_EQ(getRounded64(UINT64_MAX, 0, false), SP64(UINT64_MAX, 0));
+  EXPECT_EQ(getRounded64(UINT64_MAX, 0, true), SP64(UINT64_C(1) << 63, 1));
+}
+
+TEST(FloatsTest, getAdjusted) {
+  const uint64_t Max32In64 = UINT32_MAX;
+  EXPECT_EQ(getAdjusted32(0), SP32(0, 0));
+  EXPECT_EQ(getAdjusted32(0, 5), SP32(0, 5));
+  EXPECT_EQ(getAdjusted32(UINT32_MAX), SP32(UINT32_MAX, 0));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 1), SP32(UINT32_MAX, 1));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 1, 1), SP32(UINT32_MAX, 2));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 31), SP32(UINT32_MAX, 31));
+  EXPECT_EQ(getAdjusted32(Max32In64 << 32), SP32(UINT32_MAX, 32));
+  EXPECT_EQ(getAdjusted32(Max32In64 + 1), SP32(1u << 31, 1));
+  EXPECT_EQ(getAdjusted32(UINT64_MAX), SP32(1u << 31, 33));
+
+  EXPECT_EQ(getAdjusted64(0), SP64(0, 0));
+  EXPECT_EQ(getAdjusted64(0, 5), SP64(0, 5));
+  EXPECT_EQ(getAdjusted64(UINT32_MAX), SP64(UINT32_MAX, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 1), SP64(Max32In64 << 1, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 1, 1), SP64(Max32In64 << 1, 1));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 31), SP64(Max32In64 << 31, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 << 32), SP64(Max32In64 << 32, 0));
+  EXPECT_EQ(getAdjusted64(Max32In64 + 1), SP64(Max32In64 + 1, 0));
+  EXPECT_EQ(getAdjusted64(UINT64_MAX), SP64(UINT64_MAX, 0));
+}
+}
diff --git a/unittests/Support/StringPool.cpp b/unittests/Support/StringPool.cpp
new file mode 100644
index 000000000000..7b7805f91710
--- /dev/null
+++ b/unittests/Support/StringPool.cpp
@@ -0,0 +1,31 @@
+//===- llvm/unittest/Support/ThreadLocalTest.cpp - Therad Local tests   ---===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(PooledStringPtrTest, OperatorEquals) {
+  StringPool pool;
+  const PooledStringPtr a = pool.intern("a");
+  const PooledStringPtr b = pool.intern("b");
+  EXPECT_FALSE(a == b);
+}
+
+TEST(PooledStringPtrTest, OperatorNotEquals) {
+  StringPool pool;
+  const PooledStringPtr a = pool.intern("a");
+  const PooledStringPtr b = pool.intern("b");
+  EXPECT_TRUE(a != b);
+}
+
+}
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index f277608d2c80..feaa7c757962 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -26,7 +26,6 @@ add_tablegen(llvm-tblgen LLVM
   OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
   RegisterInfoEmitter.cpp
-  SetTheory.cpp
   SubtargetEmitter.cpp
   TableGen.cpp
   X86DisassemblerTables.cpp
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 30732c8e04d8..278315ba47b3 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -15,7 +15,6 @@
 #ifndef CODEGEN_REGISTERS_H
 #define CODEGEN_REGISTERS_H
 
-#include "SetTheory.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -23,6 +22,7 @@
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 #include <cstdlib>
 #include <map>
 #include <set>
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 65ac60207472..3fef8adf91e5 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -15,11 +15,11 @@
 #ifndef CODEGEN_SCHEDULE_H
 #define CODEGEN_SCHEDULE_H
 
-#include "SetTheory.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 1927ad923699..5dba9514283d 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -380,7 +380,7 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
       case 3: TypeSig.push_back(IIT_STRUCT3); break;
       case 4: TypeSig.push_back(IIT_STRUCT4); break;
       case 5: TypeSig.push_back(IIT_STRUCT5); break;
-      default: assert(0 && "Unhandled case in struct");
+      default: llvm_unreachable("Unhandled case in struct");
     }
 
     for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 00c3a6fd91f6..bbd61f5fb112 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h" // Declares all backends.
-#include "SetTheory.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
 
 using namespace llvm;
 
diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
index c3c0f283b558..876d4f31e950 100644
--- a/utils/lit/lit/discovery.py
+++ b/utils/lit/lit/discovery.py
@@ -200,9 +200,7 @@ def find_tests_for_inputs(lit_config, inputs):
     # Expand '@...' form in inputs.
     actual_inputs = []
     for input in inputs:
-        if os.path.exists(input) or not input.startswith('@'):
-            actual_inputs.append(input)
-        else:
+        if input.startswith('@'):
             f = open(input[1:])
             try:
                 for ln in f:
@@ -211,6 +209,8 @@ def find_tests_for_inputs(lit_config, inputs):
                         actual_inputs.append(ln)
             finally:
                 f.close()
+        else:
+            actual_inputs.append(input)
                     
     # Load the tests from the inputs.
     tests = []
diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
index 3173027759b7..4db8426ace88 100755
--- a/utils/llvm-compilers-check
+++ b/utils/llvm-compilers-check
@@ -149,6 +149,10 @@ def add_options(parser):
                       help=("Do not do installs"))
     parser.add_option("--keep-going", default=False, action="store_true",
                       help=("Keep going after failures"))
+    parser.add_option("--no-flavor-prefix", default=False, action="store_true",
+                      help=("Do not append the build flavor to the install path"))
+    parser.add_option("--enable-werror", default=False, action="store_true",
+                      help=("Build with -Werror"))
     return
 
 def check_options(parser, options, valid_builds):
@@ -346,7 +350,9 @@ class Builder(threading.Thread):
         ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()])
 
         prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]"
-        self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+        if (not self.options.no_flavor_prefix):
+            self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+
         build_suffix += "/" + self.source_abbrev[source] + "/" + build
 
         self.logger = logging.getLogger(prefix)
@@ -361,16 +367,13 @@ class Builder(threading.Thread):
 
         configure_flags = dict(
             llvm=dict(debug=["--prefix=" + self.install_prefix,
-                             "--enable-werror",
                              "--enable-assertions",
                              "--disable-optimized",
                              "--with-gcc-toolchain=" + cxxroot],
                       release=["--prefix=" + self.install_prefix,
-                               "--enable-werror",
                                "--enable-optimized",
                                "--with-gcc-toolchain=" + cxxroot],
                       paranoid=["--prefix=" + self.install_prefix,
-                                "--enable-werror",
                                 "--enable-assertions",
                                 "--enable-expensive-checks",
                                 "--disable-optimized",
@@ -379,6 +382,11 @@ class Builder(threading.Thread):
                            release=[],
                            paranoid=[]))
 
+        if (self.options.enable_werror):
+            configure_flags["llvm"]["debug"].append("--enable-werror")
+            configure_flags["llvm"]["release"].append("--enable-werror")
+            configure_flags["llvm"]["paranoid"].append("--enable-werror")
+
         configure_env = dict(
             llvm=dict(debug=dict(CC=self.cc,
                                  CXX=self.cxx),
@@ -530,10 +538,6 @@ class Builder(threading.Thread):
                 self.logger.info("[" + prefix + "] Configure failed, no configure script " + conf)
                 return -1
 
-            if not os.path.exists(mf):
-                self.logger.info("[" + prefix + "] Configure failed, no makefile " + mf)
-                return -1
-
             if os.path.exists(conf) and os.path.exists(mf):
                 confstat = os.stat(conf)
                 makestat = os.stat(mf)
diff --git a/utils/llvm.natvis b/utils/llvm.natvis
index 9874ce58d250..6da0b2512a92 100644
--- a/utils/llvm.natvis
+++ b/utils/llvm.natvis
@@ -108,14 +108,6 @@ or create a symbolic link so it updates automatically.
     </Expand>
   </Type>
 
-  <Type Name="llvm::OwningPtr&lt;*&gt;">
-    <DisplayString Condition="Ptr == 0">empty</DisplayString>
-    <DisplayString Condition="Ptr != 0">OwningPtr {*Ptr}</DisplayString>
-    <Expand>
-      <ExpandedItem Condition="Ptr != 0">Ptr</ExpandedItem>
-    </Expand>
-  </Type>
-
   <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
     <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>
     <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>